1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
|
#ifndef LEXER_H
#define LEXER_H
#include "token.h"
#include <QList>
#include <QString>
#include <QSet>
/**
* @brief Lexer for SQLite gramma.
*
* Lexer (aka tokenizer) splits SQL string into tokens.
* Tokens can be then used to syntax analysis, or for other purposes.
*
* It is useful if you have to modify some entities in the query,
* such as string, or object name, but you don't want to deal with
* all escape characters in the name, or other special characters.
* Lexer packs such entiries into separate tokens and gives them
* type, so you know what is the token representing.
*/
class API_EXPORT Lexer
{
public:
/**
* @brief Creates lexer.
*/
Lexer();
/**
* @brief Releases resources.
*/
virtual ~Lexer();
/**
* @brief Tokenizes (splits into tokens) given SQL query.
* @param sql SQL query to tokenize.
* @return List of tokens produced from tokenizing query.
*/
TokenList process(const QString& sql);
/**
* @brief Stores given SQL query internally for further processing by the lexer.
* @param sql Query to remember.
*
* This method should be followed by calls to getToken().
*/
void prepare(const QString& sql);
/**
* @brief Gets next token from query defined with prepare().
* @return Token read from the query, or null token if no more tokens are available.
*
* Each call to this method generates token for next part of the query, not tokenized yet.
* Usual flow for this method looks like this:
* @code
* QString query = "...";
* TokenPtr token;
* lexer.prepare(query);
* while (token = lexer.getToken())
* {
* // do stuff with the token
* }
* @endcode
*/
TokenPtr getToken();
/**
* @brief Clears query stored with prepare().
*/
void cleanUp();
/**
* @brief Enables or disabled tolerant mode.
* @param enabled If true, then all multi-line and unfinished tokens (strings, comments) will be reported
* with invalid=true in TolerantToken, but the token itself will have type like it was finished.
*/
void setTolerantMode(bool enabled);
/**
* @brief Provides static sample tokens of all possible types.
* @return All possible token types.
* This method uses static set of tokens, so there's no need
* to delete them outside.
*
* It's used by Parser to try every token type as a possible candidate for a next valid token.
* You should not need to use this method.
*/
QSet<TokenPtr> getEveryTokenType();
/**
* @brief Gets static sample tokens of given types.
* @param types List of token types to get tokens for. Last element in the list must be Token::INVALID.
*
* It's used by Parser to try every token type as a possible candidate for a next valid token.
* You should not need to use this method.
*
* @overload
*/
QSet<TokenPtr> getEveryTokenType(QSet<Token::Type> types);
/**
* @brief Tests whether lexer finished reading all tokens from the query.
* @return true if there is no more tokens to be read, or false otherwise.
*
* This method simply checks whether there's any characters in the query to be tokenized.
* The query is the one defined with prepare(). Query shrinks with very call to getToken()
* and once there's no more characters to consume by getToken(), this method will return false.
*
* If you call getToken() after isEnd() returned false, the getToken() will return Token::INVALID token.
*/
bool isEnd() const;
/**
* @brief Initializes internal set of static tokens.
* Initializes internal set of tokens used by getEveryTokenType().
*/
static void staticInit();
/**
* @brief Restores string from token list.
* @param tokens List of tokens.
* @return String that was represented by tokens.
*
* It simply joins values of all tokens from the list using empty string separator (that is no separator at all).
*/
static QString detokenize(const TokenList& tokens);
/**
* @brief Translates token to string propert representation.
* @param token Token to translate.
* @return Translated string.
*
* This method applies wrappers where needed (for strings and ids).
*/
static QString detokenize(const TokenPtr& token);
/**
* @brief Tokenizes given SQL query.
* @param sql SQL query to tokenize.
* @return List of tokens from tokenizing.
*
* This method is a shortcut for:
* @code
* Lexer lexer;
* lexer.tokenize(sql);
* @endcode
*/
static TokenList tokenize(const QString& sql);
/**
* @brief Translates token pointer into common token shared pointer.
* @param token Token pointer to translate.
* @return Shared pointer if found, or null pointer if not found.
*
* This method should be used against token pointers extracted from getEveryTokenType() results.
* Then pointer from any TokenPtr (returned from getEveryTokenType()) is extracted using the
* QSharedPointer::data(), then this method can be used to return back to the QSharedPointer.
*
* As Lexer keeps static internal list of tokens representing token types,
* it can translate token pointer into shared pointer by comparing them.
*
* This method and getEveryTokenType() methods are used strictly by Parser and you should not
* need to use them.
*/
static TokenPtr getEveryTokenTypePtr(Token* token);
/**
* @brief Provides token representing semicolon in SQLite dialect.
* @return Token representing semicolon.
*
* This is used by Parser to complete the parsed query in case the input query did not end with semicolon.
*/
static TokenPtr getSemicolonToken();
private:
/**
* @brief Creates token for every token type internal tables.
* @param lemonType Lemon token ID for this token type.
* @param type SQLiteStudio token type.
* @param value Sample value for the token.
* @return Created token.
*
* Every token type internal tables are populated using this method.
*
* @see getEveryTokenType()
*/
static TokenPtr createTokenType(int lemonType, Token::Type type, const QString& value);
/**
* @brief Current "tolerant mode" flag.
*
* @see setTolerantMode()
*/
bool tolerant = false;
/**
* @brief SQL query to be tokenized with getToken().
*
* It's defined with prepare().
*/
QString sqlToTokenize;
/**
* @brief Token produced by the lexer previously.
*
* This is used only by the stateful lexer processing (i.e. with getToken())
*/
TokenPtr prevTokenProcessed;
/**
* @brief Current tokenizer position in the sqlToTokenize.
*
* This position index is used to track which SQL characters should be tokenized
* on next call to getToken().
*
* It's reset to 0 by prepare() and cleanUp().
*/
quint64 tokenPosition;
/**
* @brief Internal table of every token type for SQLite 3.
*
* Internal token type table contains single token per token type, so it can be used to probe the Parser
* for next valid token candidates.
*/
static TokenPtr semicolonTokenSqlite3;
/**
* @brief Internal table of every token type for SQLite 3.
*
* Set of tokens representing all token types, including diversification by values for keywords and operators.
* It's used by the Parser to probe candidates for next valid token.
*/
static QHash<Token::Type,QSet<TokenPtr> > everyTokenType3;
/**
* @brief Map of every token type pointer to its QSharedPointer from internal tables.
*
* This is used by getEveryTokenTypePtr().
*/
static QHash<Token*,TokenPtr> everyTokenTypePtrMap;
};
#endif // LEXER_H
|