1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
|
#pragma once
#include "Syntax.h"
#include "Table.h"
#include "Stack.h"
#include "BoolSet.h"
#include "Core/Array.h"
#include "Core/Map.h"
#include "Compiler/Syntax/ParserBackend.h"
namespace storm {
namespace syntax {
namespace glr {
STORM_PKG(lang.bnf.glr);
/**
* The GLR parser backend. This parser lazily generates LR-states and uses a GLR parser
* to interpret those states.
*
* TODO: Optimize the storage of stack tops by allocating them in bulk.
*/
class Parser : public ParserBackend {
STORM_CLASS;
public:
// Create.
STORM_CTOR Parser();
protected:
// Add syntax.
virtual void add(Rule *rule);
virtual void add(ProductionType *type);
// Does this parser contain the same syntax as 'o'?
virtual Bool sameSyntax(ParserBackend *o);
// Parse a string. Returns 'true' if we found some match.
virtual Bool parse(Rule *root, Str *str, Url *file, Str::Iter start, Str::Iter end);
// Parse a string, performing error recovery.
virtual InfoErrors parseApprox(Rule *root, Str *str, Url *file,
Str::Iter start, Str::Iter end, MAYBE(InfoInternal *) ctx);
// Clear all parse-related information. Included packages are retained.
virtual void clear();
/**
* Operations on the last parse.
*/
// Found any errors? If Str::Iter is not end, this is always true. Note that even if we
// have an error, it could be possible to extract a tree!
virtual Bool hasError() const;
// Is it possible to extract a syntax tree? (equivalent to the return value of 'parse').
virtual Bool hasTree() const;
// Return an iterator after the last matched character, or the start of the string if no
// match could be made.
virtual Str::Iter matchEnd() const;
// Get the error message.
virtual Str *errorMsg() const;
// Get the error position.
virtual SrcPos errorPos() const;
// Get the syntax tree. Only for C++, in Storm we know the exact subtype we will generate!
virtual Node *tree() const;
// Get the generic syntax tree.
virtual InfoNode *infoTree() const;
/**
* Performance inspection:
*/
// Get the number of states used.
virtual Nat stateCount() const;
// Get the number of bytes used.
virtual Nat byteCount() const;
private:
/**
* Syntax related data, persistent between parses.
*/
// All syntax.
Syntax *syntax;
// The parse table.
Table *table;
// One and only empty string used for info trees.
Str *emptyStr;
/**
* Data cleared between parses.
*/
// Store tree nodes.
TreeStore *store;
// Stacks for future steps.
FutureStacks *stacks;
// Root rule for this parse.
Nat parseRoot;
// Start position of the last parse.
Nat startPos;
// End position of the last parse.
Nat endPos;
// Source string being parsed.
Str *source;
// Url of the source string.
Url *sourceUrl;
// Last found stack which accepted the string. Starts with a dummy stack item for
// the topmost production.
StackItem *acceptingStack;
// The last non-empty state set. Used for error reporting.
Array<StackItem *> *lastSet;
// The position of 'lastSet'.
Nat lastPos;
/**
* Data used during parsing.
*/
// Current position in the input.
Nat currentPos;
// The current state being processed in the topmost stack.
Nat topVisiting;
/**
* Member functions.
*/
// Compute the start item-set for a rule.
ItemSet startSet(Rule *rule);
// Find the start state for for a rule.
StackItem *startState(Nat pos, Rule *rule);
// Add 'production' and all other productions which may complete this production to
// 'alwaysReduce'.
void findAlwaysReduce(Nat production);
// Clear all data derived from the syntax.
void clearSyntax();
/**
* Set-up and tear down of parsing.
*/
// Initialize parsing of 'str'.
void initParse(Rule *root, Str *str, Url *file, Str::Iter start, Str::Iter end);
// Remove any temporary state used during parsing.
void finishParse(MAYBE(InfoInternal *) context);
// Parse from position 'i' to completion. Assumes 'stacks->top()' contains valid information.
void doParse(Nat from);
// Parse as 'doParse' does. Keep track of the last states before the last line
// ending as well as the error information in 'doParse'. Used for better error recovery.
void doParse(Nat from, Array<StackItem *> *&states, Nat &pos);
/**
* Error recovery specials.
*/
// Advance all states on the current stack top.
void advanceAll();
void shiftAll();
void reduceAll();
// Perform all shifts possible at the current stack top. Returns 'true' if any
// future states were generated.
bool actorShift();
// Perform all possible shifts in the current stack top. Returns 'true' if one or
// more shifts were performed. Returns 'true' if a matching shift was found.
bool shiftAll(StackItem *now);
bool shiftAll(StackItem *now, Array<Action> *actions);
void shiftAll(StackItem *now, Map<Nat, Nat> *actions);
/**
* Parsing functions. Based on the paper "Parser Generation for Interactive
* Environments" by Jan Renkers.
*/
// Act on all states until we're done.
void actor(Nat pos, Array<StackItem *> *states);
// Data passed to the reduction actor.
struct ActorEnv {
// Current state.
State *state;
// Top of stack before reductions started.
StackItem *stack;
// Reduce all states, even states that have not reached completion. Used when
// performing error correction. Contains information on which states have
// already been reduced.
bool reduceAll;
};
// Perform actions required for a state.
bool actorShift(const ActorEnv &env);
void actorReduce(const ActorEnv &env, StackItem *through);
void doReduce(const ActorEnv &env, Nat production, StackItem *through);
// Perform reductions on all states, even if a reduction action is not present.'
void actorReduceAll(const ActorEnv &env, StackItem *through);
// Static state to the 'reduce' function.
struct ReduceEnv {
// Env for recursive calls to reduce.
const ActorEnv &old;
// Production and rule being reduced.
Nat production;
Nat rule;
// Number of items of the currently reduced production.
Nat length;
// Shift-errors to be added to the current production.
Nat errors;
};
// Linked list of entries, keeping track of the path currently being reduced.
struct Path {
const Path *prev;
StackItem *item;
};
// Reduce a production of length 'len' from the current stack item. If 'through' is
// set, only nodes where the edge 'link' is passed are considered.
void reduce(const ReduceEnv &env, StackItem *stack, const Path *path, StackItem *through, Nat len);
void finishReduce(const ReduceEnv &env, StackItem *stack, const Path *path);
// Limited reduction of a rule. Only paths passing through the edge 'link' are considered.
void limitedReduce(const ReduceEnv &env, StackItem *through);
// Produce error messages from the state set 'states'.
void errorMsg(StrBuf *out, Nat pos, Array<StackItem *> *states) const;
void errorMsg(Set<Str *> *errors, Nat state) const;
// Produce error messages related to an unfullfilled requirement in the stack items provided.
void reqErrorMsg(StrBuf *out, StackItem *states) const;
Nat findMissingReq(Nat tree, ParentReq required) const;
/**
* Tree computation.
*/
// Create the tree node starting at 'node'.
Node *tree(const TreeNode &node, Nat endPos) const;
// Fill in 'result' from the subtree generated by a pseudo-production.
void subtree(Node *result, TreeNode &node, Nat endPos) const;
// Fill in a token.
void setToken(Node *result, TreeNode &node, Nat endPos, Token *token) const;
// Create a syntax node for the production 'p'.
Node *allocNode(Production *p, Nat start, Nat end) const;
/**
* Info tree computation.
*/
// Create the info node starting at 'node'.
InfoNode *infoTree(const TreeNode &node, Nat endPos) const;
// Fill in 'result'. from the subtree generated by a pseudo-production. Returns # of errors.
InfoErrors infoSubtree(InfoInternal *result, Nat &resultPos, TreeNode &node, Nat endPos) const;
// Fill in a token. Returns # of errors.
InfoErrors infoToken(InfoInternal *result, Nat &resultPos, TreeNode &node, Nat endPos, Token *token) const;
// Compute the number of nodes for 'node' considering pseudo productions for repetitions.
Nat totalLength(const TreeNode &node) const;
/**
* Full info tree computation.
*/
// Construct completion nodes for the last, uncompleted states.
void completePrefix();
};
}
}
}
|