1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
|
#pragma once
#include "SrcPos.h"
/**
* Token generated by the tokenizer.
*/
class Token {
public:
Token(const String &token, const SrcPos &pos) : token(token), pos(pos) {}
// Contents of this token.
String token;
// Where is the starting point of this token?
SrcPos pos;
// Compare the actual token.
inline bool operator ==(const Token &o) const { return token == o.token; }
inline bool operator !=(const Token &o) const { return token != o.token; }
inline bool operator ==(const String &o) const { return token == o; }
inline bool operator !=(const String &o) const { return token != o; }
// Empty token?
inline bool empty() const { return token.size() == 0; }
// Is this a string?
bool isStr() const;
// Extract the string from the token. Assumes isStr().
String strVal() const;
};
// Output.
wostream &operator <<(wostream &to, const Token &t);
/**
* Comment generated by the tokenizer.
*/
class Comment {
public:
Comment() : src(null), fileId(0), begin(0), end(0) {}
Comment(const String &src, nat fileId, nat begin, nat end) : src(&src), fileId(fileId), begin(begin), end(end) {}
// Create a string from this comment. This strips all formatting that is applied to the comment,
// such as leading asterisks.
String str() const;
// Any comment at all?
inline bool empty() const { return begin == end; }
inline bool any() const { return begin > end; }
private:
// Source string.
const String *src;
// Source file.
nat fileId;
// Start and end position.
nat begin;
nat end;
// State used when parsing.
enum State {
start,
start2,
done,
// Single line comments.
singleStart,
singleInside,
singleNewline,
singleHalf,
singleBefore,
// Multi line comments.
multiStart,
multiInside,
multiNewline,
multiBefore,
};
// Parameters for the parsing.
struct Params {
// Number of spaces after the comment 'start'.
nat space;
// Current number of spaces.
nat curr;
// Number of empty lines encountered.
nat empty;
};
// Helper for parsing comments.
static const wchar_t *parse(std::wostringstream &to, State &state, Params &par, wchar_t ch);
};
// Output.
wostream &operator <<(wostream &to, const Comment &c);
/**
* Tokenizer designed to properly tokenize the contents of .bnf files.
* Handles strings and basic operators (including ()[] and {}).
* Also handles comments. Comments have the form // ... \n
*/
class Tokenizer : NoCopy {
public:
// Tokenize data in 'SrcPos::files[pathId]' from 'start'.
Tokenizer(nat pathId);
// Get the next token in the stream. Throws an exception if the end of stream
// has been reached.
Token next();
// Skip current token.
void skip();
// Peek.
Token peek();
// More tokens to get?
bool more() const;
// Get a token and see it is the correct one.
void expect(const String &s);
// Skip the current token if it is 'str'.
bool skipIf(const String &s);
// Get the last comment relevant to the current token.
Comment comment() const;
// Clear the current comment.
void clearComment();
private:
// Source string.
const String src;
// Source id.
nat pathId;
// Current position.
nat pos;
// Current SrcPos.
SrcPos srcPos;
// Start and end of the last comment.
nat commentBegin;
nat commentEnd;
// Different states of the tokenizer.
enum State {
sStart,
sText,
sOperator,
sString,
sComment,
sMlComment,
sPreproc,
sPreprocExtend,
sDone,
};
// The next found token.
Token nextToken;
// Find the next token.
Token findNext();
// Do one step in the state-machine.
void processChar(nat &start, State &state, bool &firstComment);
// Advance a srcPos.
void advance(SrcPos &pos, nat from, nat to) const;
};
|