1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
|
#pragma once
#include "Compiler/NamedThread.h"
#include "Compiler/Syntax/ParserBackend.h"
#include "Compiler/Syntax/Node.h"
#include "Compiler/Syntax/InfoNode.h"
#include "Compiler/Syntax/Rule.h"
#include "Compiler/Syntax/Production.h"
#include "Compiler/Syntax/Earley/State.h"
#include "Core/Array.h"
#include "Core/Map.h"
#include "OS/FnCall.h"
#include "Compiler/Exception.h"
namespace storm {
namespace syntax {
namespace earley {
STORM_PKG(lang.bnf.earley);
/**
* The parser here is the main parser in Storm and is based on the parser described by Jay
* Earley in 'An Efficient Context-Free Parsing Algorithm'.
*
* TODO: Make it possible to parse things on other threads than the compiler thread.
*/
class Parser : public ParserBackend {
STORM_CLASS;
public:
// Create.
STORM_CTOR Parser();
protected:
// Add a package containing syntax definitions (not recursive).
virtual void add(Rule *rule);
virtual void add(ProductionType *production);
// Does this parser contain the same syntax as 'o'?
virtual Bool sameSyntax(ParserBackend *o);
// Parse a string. Returns 'true' if we found some match.
virtual Bool parse(Rule *root, Str *str, Url *file, Str::Iter start, Str::Iter end);
// Clear all parse-related information. Included packages are retained.
virtual void clear();
/**
* Operations on the last parse.
*/
// Found any errors? If Str::Iter is not end, this is always true. Note that even if we
// have an error, it could be possible to extract a tree!
virtual Bool hasError() const;
// Is it possible to extract a syntax tree? (equivalent to the return value of 'parse').
virtual Bool hasTree() const;
// Return an iterator after the last matched character, or the start of the string if no
// match could be made.
virtual Str::Iter matchEnd() const;
// Get the error message.
virtual Str *errorMsg() const;
// Get the error position.
virtual SrcPos errorPos() const;
// Get the syntax tree.
virtual Node *tree() const;
// Get the generic syntax tree.
virtual InfoNode *infoTree() const;
/**
* Performance inspection:
*/
// Get the number of states used.
virtual Nat stateCount() const;
// Get the number of bytes used.
virtual Nat byteCount() const;
private:
// Information about a rule.
class RuleInfo {
STORM_VALUE;
public:
STORM_CTOR RuleInfo();
// All productions for this rule. May be null.
Array<Production *> *productions;
// Can this rule match the empty string? 0 = no, 1 = yes, >=2 don't know yet.
Byte matchesNull;
// Add a production. Handles the case where 'productions' is null.
void push(Production *p);
};
// All rules we know of so far, and their options.
Map<Rule *, RuleInfo> *rules;
// Parsed source string.
Str *src;
// Current file and start position.
MAYBE(Url *) srcFile;
Nat srcOffset;
// Root production.
Production *rootProd;
// Steps. Each step corresponds to a character in the input string (including an
// implicit end of string character).
GcArray<StateSet *> *steps;
// Last state containing a finish step (initialized to something >= states.count).
nat lastFinish;
// Process a single step. Returns true if we found an accepting state here.
bool process(nat step);
// Predictor, completer and scanner as described in the paper. The StateSet should be
// the current step, ie. the set in which 'state' belongs.
void predictor(StatePtr ptr, const State &state);
void completer(StatePtr ptr, const State &state);
void scanner(StatePtr ptr, const State &state);
// Does 'rule' match an empty string?
bool matchesEmpty(Rule *r);
bool matchesEmpty(RuleInfo &info);
bool matchesEmpty(Production *p);
bool matchesEmpty(Token *t);
// Find the last step which is not empty.
nat lastStep() const;
// Find the finishing state (the last one if there are more).
const State *finish() const;
// Find all rules and productions in progress for a given state.
Map<Str *, StrBuf *> *inProgress(const StateSet &step) const;
// Create a tree for the production ending in 'end'.
Node *tree(StatePtr end) const;
// Allocate a tree node.
Node *allocNode(const State &from, Nat endStep) const;
// Reverse all arrays in a node.
void reverseNode(Node *node) const;
// Empty string used in the info tree.
Str *emptyString;
// Create a tree for the production ending in 'end'.
InfoNode *infoTree(StatePtr end) const;
// Get a State from a StatePtr.
const State &state(const StatePtr &p) const;
// State set needs to access 'state()'
friend class StateSet;
};
}
}
}
|