File: lexer.h

package info (click to toggle)
kdevelop 4%3A4.3.1-3
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 18,844 kB
  • sloc: cpp: 91,758; python: 1,095; lex: 422; ruby: 120; sh: 114; xml: 42; makefile: 38
file content (286 lines) | stat: -rw-r--r-- 8,017 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
/* This file is part of KDevelop
    Copyright 2002-2005 Roberto Raggi <roberto@kdevelop.org>
    Copyright 2007-2008 David Nolden <david.nolden.kdevelop@art-master.de>

   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public
   License version 2 as published by the Free Software Foundation.

   This library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.

   You should have received a copy of the GNU Library General Public License
   along with this library; see the file COPYING.LIB.  If not, write to
   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
   Boston, MA 02110-1301, USA.
*/

#ifndef LEXER_H
#define LEXER_H

#include "symbol.h"
#include <cppparserexport.h>
#include <QtCore/QString>
#include <cstdlib>
#include <language/duchain/indexedstring.h>

#include <language/interfaces/iproblem.h>

struct NameSymbol;
class Lexer;
class Control;
class ParseSession;

typedef void (Lexer::*scan_fun_ptr)();

/**Token.*/
class KDEVCPPPARSER_EXPORT Token
{
public:
  ///kind of the token @see TOKEN_KIND enum reference.
  int kind;
  ///position in the preprocessed buffer
  uint position;
  ///size of the token in the preprocessed buffer. Do not confuse this with symbolLength.
  uint size;
  ///pointer to the parse session.
  const ParseSession* session;

  //Symbol associated to the token. This only works if this is a simple symbol
  //only consisting of one identifier(not comments), does not work for operators like "->" or numbers like "50"
  KDevelop::IndexedString symbol() const;
  //The index of the symbol associated to the token.
  //The notes from @c symbol() apply as well.
  uint symbolIndex() const;

  //This always works, but is expensive
  QString symbolString() const;
  QByteArray symbolByteArray() const;

  uint symbolLength() const;
};

/**Stream of tokens found by lexer.
Internally works like an array of @ref Token continuosly allocated.
All tokens are destructed when this stream is deleted.

The stream has a "cursor" which is simply an integer which defines
the offset (index) of the token currently "observed" from the beginning of
the stream.

TODO: reuse some pool / container class for the token array
NOTE: token_count is actually the *size* of the token pool
      the last actually used token is lastToken
*/
class TokenStream : public QVector<Token>
{
private:
  TokenStream(const TokenStream &);
  void operator = (const TokenStream &);

public:
  /**Creates a token stream with the default reserved size of 1024 tokens.*/
  inline TokenStream(uint size = 1024)
    : index(0)
  {
    reserve(size);
  }

  /**@return the token at position @p index.*/
  inline const Token &token(int index) const
  { return at(index); }

  /**@return the "cursor" - the offset (index) of the token
  currently "observed" from the beginning of the stream.*/
  inline uint cursor() const
  { return index; }

  /**Sets the cursor to the position @p i.*/
  inline void rewind(int i)
  { index = i; }

  /**Updates the cursor position to point to the next token and returns
  the cursor.*/
  inline uint nextToken()
  { return index++; }

  /**@return the kind of the next (LA) token in the stream.*/
  inline int lookAhead(uint i = 0) const
  { return at(index + i).kind; }

  /**@return the kind of the current token in the stream.*/
  inline int kind(uint i) const
  { return at(i).kind; }

  /**@return the position of the current token in the c++ source buffer.*/
  inline uint position(uint i) const
  { return at(i).position; }

  /**
   * Split the right shift token at @p index into two distinct right angle brackets.
   * 
   * Required to support 14.2/3 of the spec, see also:
   * http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1757.html
   */
  void splitRightShift(uint index);

private:
  uint index;
};

/**C++ Lexer.*/
class Lexer
{
public:
  /**
   * Constructor.
   *
   * \param token_stream Provides a stream of tokens to the lexer.
   * \param location_table a table which will be filled with non-preprocessed line -> offset values
   * \param line_table a table which will be filled with (non-preproccessed line which contains a preprocessor line) -> offset values
   */
  Lexer(Control *control);

  /**Finds tokens in the @p contents buffer and fills the @ref token_stream.*/
  void tokenize(ParseSession* session);

  ParseSession* session;

private:
  void skipComment();
  /**Fills the scan table with method pointers.*/
  void initialize_scan_table();
  void scan_newline();
  void scan_white_spaces();
  void scan_identifier_or_keyword();
  void scan_identifier_or_literal();
  void scan_int_constant();
  void scan_char_constant();
  void scan_string_constant();
  void scan_raw_string_constant();
  void scan_raw_string_or_identifier();
  void scan_invalid_input();
  void scan_preprocessor();

  // keywords
  void scanKeyword0();
  void scanKeyword2();
  void scanKeyword3();
  void scanKeyword4();
  void scanKeyword5();
  void scanKeyword6();
  void scanKeyword7();
  void scanKeyword8();
  void scanKeyword9();
  void scanKeyword10();
  void scanKeyword11();
  void scanKeyword12();
  void scanKeyword13();
  void scanKeyword14();
  void scanKeyword16();

  // operators
  void scan_not();
  void scan_remainder();
  void scan_and();
  void scan_left_paren();
  void scan_right_paren();
  void scan_star();
  void scan_plus();
  void scan_comma();
  void scan_minus();
  void scan_dot();
  void scan_divide();
  void scan_colon();
  void scan_semicolon();
  void scan_less();
  void scan_equal();
  void scan_greater();
  void scan_question();
  void scan_left_bracket();
  void scan_right_bracket();
  void scan_xor();
  void scan_left_brace();
  void scan_or();
  void scan_right_brace();
  void scan_tilde();
  void scan_EOF();

  KDevelop::ProblemPointer createProblem() const;

private:
  Control *control;
  
  struct SpecialCursor {
    bool operator==(uint index) const {
      return *current == index;
    }
    bool operator==(char character) const {
      return *current == (character | 0xffff0000);
    }
    bool isChar() const {
      return ((*current) & 0xffff0000) == 0xffff0000;
    }
    inline char operator*() const {
      if(isChar())
        return (char)*current;
      else
        return 'a'; //Return a valid character, because the identifiers created by the preprocessor are alpha-numerical
    }
    void operator++() {
      ++current;
    }
    void operator+=(int offset) {
      current += offset;
    }
    bool operator !=(const SpecialCursor& rhs) const {
      return current != rhs.current;
    }
    bool operator !=(const uint* rhs) const {
      return current != rhs;
    }
    void operator--() {
      --current;
    }
    bool operator<(const uint* end) const {
      return current < end;
    }
    
    int operator -(const SpecialCursor& rhs) const {
      return (((char*)current) - ((char*)rhs.current)) / sizeof(uint);
    }
    
    uint offsetIn(const uint* base) const {
      return ((char*)current - (char*)base) / sizeof(uint);
    }
    
    SpecialCursor operator +(int offset) {
      SpecialCursor ret(*this);
      ret.current += offset;
      return ret;
    }

    // useful for debugging
    QString toString() const;

    uint* current;
  };
  
  SpecialCursor cursor;
  const uint* endCursor;
  uint index;

  bool m_leaveSize; //Marks the current token that its size should not be automatically set
  bool m_canMergeComment; //Whether we may append new comments to the last encountered one
  bool m_firstInLine;   //Whether the next token is the first one in a line
  
  ///scan table contains pointers to the methods to scan for various token types
  static scan_fun_ptr s_scan_table[];
  static scan_fun_ptr s_scan_keyword_table[];
  static bool s_initialized;
};

#endif // LEXER_H