1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338
|
#ifndef PARSER_H
#define PARSER_H
#include "token.h"
#undef NDEBUG
#include "ast/sqlitequery.h"
#include "ast/sqliteexpr.h"
class Lexer;
class ParserContext;
class ParserError;
/**
* @brief SQL parser.
*
* The Parser analyzes given query and produces an Abstract Syntax Tree (AST).
* The AST is a tree of objects describing parsed query.
*
* Typical use case would be:
* @code
* Parser parser();
* if (parser.parse(queryString))
* {
* QList<SqliteQueryPtr> queries = parser.getQueries();
* qDebug() << "number of queries parsed:" << queries.size();
* for (SqliteQueryPtr query : queries)
* {
* // do stuff with parsed queries
* // ...
* if (query.dynamicCast<SqliteSelect>())
* {
* qDebug() << "it's a select!";
* }
* }
* }
* else
* {
* qDebug() << "Error while parsing:" << parser.getErrorString();
* }
* @endcode
*
* There's also a convenient parse<T>() method with template argument.
*
* There is a getNextTokenCandidates() to ask for all valid (according to syntax
* rules) token types to be used after given query string,
*
* Finally, there is a parseExpr() to parse just a SQLite expression
* (http://sqlite.org/lang_expr.html).
*
* Parser works basing on SQLite grammar defined in sqlite3.y file.
*
* This is a high-level API to the Lemon Parser, the original SQLite parser.
*/
class API_EXPORT Parser
{
public:
/**
* @brief Creates parser for SQLite dialect.
*/
Parser();
/**
* @brief Releases internal resources.
*/
virtual ~Parser();
/**
* @brief Enables or disables low-level debug messages for this parser.
* @param enabled true to enable, false to disable debug messages.
*
* Enabling this causes detailed debug messages from the Lemon parser
* to be printed. It is useful if you cannot understand why the parser
* thinks that the query is incorrect, etc.
*/
void setLemonDebug(bool enabled);
/**
* @brief Parses given query string.
* @param sql SQL query string to parse. Can be multiple queries separated with semicolon.
* @param ignoreMinorErrors If true, then parser will ignore minor errors. Detailed descritpion below.
* @return true if the query was successfully parsed, or false if not.
*
* When the parser encounters syntax error, it stops and returns false. The AST objects (parsed queries)
* are partially filled with data - as much as it was possible till the error. Errors can be examined
* with getErrors() or getErrorString().
*
* The \p ignoreMinorErrors allows to ignore minor syntax errors. The minor error is the error
* when for example there's a SELECT query, but no result column was typed yet. Normally this is incorrect
* query, cause SELECT statement requires at least 1 result column, but we can tell parser to ignore it.
*
* The usual case for minor error is when there's a SQLite expression missing at position, where it's expected,
* or when the expression is incomplete, like <tt>database.table.</tt> (no column name as the last part).
*/
bool parse(const QString& sql, bool ignoreMinorErrors = false);
/**
* @brief Parses SQLite expression.
* @param sql SQLite expression.
* @return Parsed object, or null on failure. Parser doesn't own parsed object, you have to take care of deleting it.
*
* SQLite expression is any expression that you could type after <tt>"SELECT * FROM WHERE"</tt>, etc.
* It's syntax is described at: http://sqlite.org/lang_expr.html
*/
SqliteExpr* parseExpr(const QString& sql);
/**
* @brief Parses given query and returns it AST specialized object.
* @tparam T Type of AST object to parse into.
* @param query SQL query string to parse.
* @return Shared pointer to the parsed AST object, or null pointer if the query could not be parsed,
* or the parsed object was not of the requested type.
*
* This is a convenient method to parse string query, pick first parsed query from getQueries()
* and case it into desired AST object type. If this process fails at any point, the result returned will be
* a null pointer.
*
* Example:
* @code
* Parser parser;
* SqliteSelectPtr select = parser.parse<SelectPtr>(queryString);
* if (!select)
* {
* qCritical() << "Could not parse" << queryString << "to a SELECT statement, details:" << parser.getErrorString();
* return;
* }
* // do stuff with the 'select' object
* // ...
* @endcode
*/
template <class T>
QSharedPointer<T> parse(const QString& query)
{
if (!parse(query) || getQueries().size() == 0)
return QSharedPointer<T>();
return getQueries().first().dynamicCast<T>();
}
/**
* @brief Tests what are possible valid candidates for the next token.
* @param sql Part of the SQL query to check for the next token.
* @return List of token candidates.
*
* This method gets list of all token types from Lexer::getEveryTokenType() and tests which of them does the parser
* accept for the next token after the given query.
*
* You should treat the results of this method as a list of token <b>types</b>, rather than explicit tokens.
* Each token in the results represents a logical grammar entity. You should look at the Token::type and Token::value,
* while the Token::value is meaningful only for Token::KEYWORD, or Token::OPERATOR. For other token types, the value
* is just an example value (like for Token::INTEGER all numbers are valid candidates, not just one returned
* from this method).
*/
TokenList getNextTokenCandidates(const QString& sql);
/**
* @brief Provides list of queries parsed recently by the parser.
* @return List of queries.
*
* On successful execution this list should contain at least 1 query, unless parsed query
* was a blank string - in that case this method will return list with no elements.
*
* In case of parsing error it's undefined how many elements will be in the list
* and also how much of the information will be filled in the queries - it depends on where the error appeared.
*/
const QList<SqliteQueryPtr>& getQueries();
/**
* @brief Provides list of errors that occurred during parsing.
* @return List of errors.
*
* Usually there's just one error, but there are cases when there might be more error on the list.
* That would be for example if you type "!" somewhere in the query where it should not be.
* Parser can deal with such errors and proceed. Such errors are later reported as failed parsing after all,
* but parser can continue and provide more data for AST objects (even they will be result of failed parsing process)
* and find other errors. In such cases, there can be 2, or even more errors on the list.
*/
const QList<ParserError*>& getErrors();
/**
* @brief Provides error message from recent failed parsing process.
* @return Error message.
*
* This is convenient method to get first error getom getErrors() and return message from it.
*/
QString getErrorString();
/**
* @brief Provides list of tokens procudes during parsing process.
* @return List of tokens.
*
* Parser tokenizes query in order to parse it. It stores those tokens, so you can use them and you don't
* need to put query through the Lexer again (after Parser did it).
*/
TokenList getParsedTokens();
/**
* @brief Tells whether most recent parsing was successful.
* @return true if parsing was successful, or false otherwise.
*
* This method tells result for: parse(), parse<T>(), getNextTokenCandidates() and parseExpr().
*/
bool isSuccessful() const;
/**
* @brief Clears parser state.
*
* Clears any parsed queries, stored tokens, errors, etc.
*/
void reset();
private:
/**
* @brief Does the actual parsing job.
* @param sql Query to be parsed.
* @param lookForExpectedToken true if the parsing should be in "look for valid token candidates" mode,
* or false for regular mode.
* @return true on success, or false on failure.
*
* Both parse() and getNextTokenCandidates() call this method.
*/
bool parseInternal(const QString &sql, bool lookForExpectedToken);
/**
* @brief Probes token types against the current parser state.
* @param pParser Pointer to Lemon parser.
*
* Probes all token types against current state of the parser. After each probe, the result is stored
* and the parser state is restored to as what it was before the probe.
*
* After all tokens were probed, we have the full information on what tokens are welcome
* at this parser state. This information is stored in the acceptedTokens member.
*/
void expectedTokenLookup(void *pParser);
/**
* @brief Initializes Parser's internals.
*
* Creates internal Lexer and ParserContext.
*/
void init();
/**
* @brief Cleans up Parser's resources.
*
* Deletes internal Lexer and ParserContext.
*/
void cleanUp();
/**
* @brief Creates Lemon parser.
* @return Pointer to Lemon parser.
*/
void* parseAlloc(void *(*mallocProc)(size_t));
/**
* @brief Releases memory of the Lemon parser.
* @param p Pointer to Lemon parser.
*/
void parseFree(void *p, void (*freeProc)(void*));
/**
* @brief Invokes next step of Lemon parsing process.
* @param yyp Pointer to the Lemon parser.
* @param yymajor Lemon token ID (Token::lemonType) of the next token to be parsed.
* @param yyminor Next Token object to be parsed.
* @param parserContext Common context object for the parsing process.
*
* This method feeds Lemon parser with next token. This is the major input method
* for parsing the query. It's a bridge between the high-level Parser API
* and the low-level Lemon parser.
*/
void parse(void *yyp, int yymajor, TokenPtr yyminor, ParserContext* parserContext);
/**
* @brief Enables low-level parser debug messages.
* @param stream Stream to write messages to.
* @param zPrefix Prefix for all messages.
*/
void parseTrace(FILE *stream, char *zPrefix);
/**
* @brief Copies Lemon parser state.
* @param other Input parser state.
* @return Copied parser state.
*/
void* parseCopyParserState(void* other);
/**
* @brief Restores Lemon parser state from saved copy.
* @param saved Saved copy of Lemon parser state.
* @param target Parser state to restore from saved copy.
*/
void parseRestoreParserState(void* saved, void* target);
/**
* @brief Releases memory used for the Lemon parser state copy.
* @param other Lemon parser state to be freed.
*/
void parseFreeSavedState(void* other);
/**
* @brief Adds meaningless token into Lemon's parser stack.
* @param other Lemon parser.
* @param token Token to be added.
*
* This method is used to add spaces and comments to the Lemon's stack.
*/
void parseAddToken(void* other, TokenPtr token);
/**
* @brief Flag indicating if the Lemon low-level debug messages are enabled.
*/
bool debugLemon = false;
/**
* @brief Parser's internal Lexer.
*/
Lexer* lexer = nullptr;
/**
* @brief Parser's internal context shared for the all Lemon parsing steps.
*
* Context is used as an output from Lemon parser. Lemon parser stores error details, token maps,
* and others in it.
*
* On the other side, Parser class puts configuration into the Context, so Lemon
* can use it.
*/
ParserContext* context = nullptr;
/**
* @brief List of valid tokens collected by expectedTokenLookup().
*/
TokenList acceptedTokens;
};
#endif // PARSER_H
|