File: Parser.h

package info (click to toggle)
storm-lang 0.7.4-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 52,004 kB
  • sloc: ansic: 261,462; cpp: 140,405; sh: 14,891; perl: 9,846; python: 2,525; lisp: 2,504; asm: 860; makefile: 678; pascal: 70; java: 52; xml: 37; awk: 12
file content (304 lines) | stat: -rw-r--r-- 9,028 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
#pragma once
#include "Syntax.h"
#include "Table.h"
#include "Stack.h"
#include "BoolSet.h"
#include "Core/Array.h"
#include "Core/Map.h"
#include "Compiler/Syntax/ParserBackend.h"

namespace storm {
	namespace syntax {
		namespace glr {
			STORM_PKG(lang.bnf.glr);

			/**
			 * The GLR parser backend. This parser lazily generates LR-states and uses a GLR parser
			 * to interpret those states.
			 *
			 * TODO: Optimize the storage of stack tops by allocating them in bulk.
			 */
			class Parser : public ParserBackend {
				STORM_CLASS;
			public:
				// Create.
				STORM_CTOR Parser();

			protected:
				// Add syntax.
				virtual void add(Rule *rule);
				virtual void add(ProductionType *type);

				// Does this parser contain the same syntax as 'o'?
				virtual Bool sameSyntax(ParserBackend *o);

				// Parse a string. Returns 'true' if we found some match.
				virtual Bool parse(Rule *root, Str *str, Url *file, Str::Iter start, Str::Iter end);

				// Parse a string, performing error recovery.
				virtual InfoErrors parseApprox(Rule *root, Str *str, Url *file,
											Str::Iter start, Str::Iter end, MAYBE(InfoInternal *) ctx);

				// Clear all parse-related information. Included packages are retained.
				virtual void clear();

				/**
				 * Operations on the last parse.
				 */

				// Found any errors? If Str::Iter is not end, this is always true. Note that even if we
				// have an error, it could be possible to extract a tree!
				virtual Bool hasError() const;

				// Is it possible to extract a syntax tree? (equivalent to the return value of 'parse').
				virtual Bool hasTree() const;

				// Return an iterator after the last matched character, or the start of the string if no
				// match could be made.
				virtual Str::Iter matchEnd() const;

				// Get the error message.
				virtual Str *errorMsg() const;

				// Get the error position.
				virtual SrcPos errorPos() const;

				// Get the syntax tree. Only for C++, in Storm we know the exact subtype we will generate!
				virtual Node *tree() const;

				// Get the generic syntax tree.
				virtual InfoNode *infoTree() const;

				/**
				 * Performance inspection:
				 */

				// Get the number of states used.
				virtual Nat stateCount() const;

				// Get the number of bytes used.
				virtual Nat byteCount() const;

			private:
				/**
				 * Syntax related data, persistent between parses.
				 */

				// All syntax.
				Syntax *syntax;

				// The parse table.
				Table *table;

				// One and only empty string used for info trees.
				Str *emptyStr;

				/**
				 * Data cleared between parses.
				 */

				// Store tree nodes.
				TreeStore *store;

				// Stacks for future steps.
				FutureStacks *stacks;

				// Root rule for this parse.
				Nat parseRoot;

				// Start position of the last parse.
				Nat startPos;

				// End position of the last parse.
				Nat endPos;

				// Source string being parsed.
				Str *source;

				// Url of the source string.
				Url *sourceUrl;

				// Last found stack which accepted the string. Starts with a dummy stack item for
				// the topmost production.
				StackItem *acceptingStack;

				// The last non-empty state set. Used for error reporting.
				Array<StackItem *> *lastSet;

				// The position of 'lastSet'.
				Nat lastPos;

				/**
				 * Data used during parsing.
				 */

				// Current position in the input.
				Nat currentPos;

				// The current state being processed in the topmost stack.
				Nat topVisiting;

				/**
				 * Member functions.
				 */

				// Compute the start item-set for a rule.
				ItemSet startSet(Rule *rule);

				// Find the start state for for a rule.
				StackItem *startState(Nat pos, Rule *rule);

				// Add 'production' and all other productions which may complete this production to
				// 'alwaysReduce'.
				void findAlwaysReduce(Nat production);

				// Clear all data derived from the syntax.
				void clearSyntax();

				/**
				 * Set-up and tear down of parsing.
				 */

				// Initialize parsing of 'str'.
				void initParse(Rule *root, Str *str, Url *file, Str::Iter start, Str::Iter end);

				// Remove any temporary state used during parsing.
				void finishParse(MAYBE(InfoInternal *) context);

				// Parse from position 'i' to completion. Assumes 'stacks->top()' contains valid information.
				void doParse(Nat from);

				// Parse as 'doParse' does. Keep track of the last states before the last line
				// ending as well as the error information in 'doParse'. Used for better error recovery.
				void doParse(Nat from, Array<StackItem *> *&states, Nat &pos);

				/**
				 * Error recovery specials.
				 */

				// Advance all states on the current stack top.
				void advanceAll();
				void shiftAll();
				void reduceAll();

				// Perform all shifts possible at the current stack top. Returns 'true' if any
				// future states were generated.
				bool actorShift();

				// Perform all possible shifts in the current stack top. Returns 'true' if one or
				// more shifts were performed. Returns 'true' if a matching shift was found.
				bool shiftAll(StackItem *now);
				bool shiftAll(StackItem *now, Array<Action> *actions);
				void shiftAll(StackItem *now, Map<Nat, Nat> *actions);

				/**
				 * Parsing functions. Based on the paper "Parser Generation for Interactive
				 * Environments" by Jan Renkers.
				 */

				// Act on all states until we're done.
				void actor(Nat pos, Array<StackItem *> *states);

				// Data passed to the reduction actor.
				struct ActorEnv {
					// Current state.
					State *state;

					// Top of stack before reductions started.
					StackItem *stack;

					// Reduce all states, even states that have not reached completion. Used when
					// performing error correction. Contains information on which states have
					// already been reduced.
					bool reduceAll;
				};

				// Perform actions required for a state.
				bool actorShift(const ActorEnv &env);
				void actorReduce(const ActorEnv &env, StackItem *through);
				void doReduce(const ActorEnv &env, Nat production, StackItem *through);

				// Perform reductions on all states, even if a reduction action is not present.'
				void actorReduceAll(const ActorEnv &env, StackItem *through);

				// Static state to the 'reduce' function.
				struct ReduceEnv {
					// Env for recursive calls to reduce.
					const ActorEnv &old;

					// Production and rule being reduced.
					Nat production;
					Nat rule;

					// Number of items of the currently reduced production.
					Nat length;

					// Shift-errors to be added to the current production.
					Nat errors;
				};

				// Linked list of entries, keeping track of the path currently being reduced.
				struct Path {
					const Path *prev;
					StackItem *item;
				};

				// Reduce a production of length 'len' from the current stack item. If 'through' is
				// set, only nodes where the edge 'link' is passed are considered.
				void reduce(const ReduceEnv &env, StackItem *stack, const Path *path, StackItem *through, Nat len);
				void finishReduce(const ReduceEnv &env, StackItem *stack, const Path *path);

				// Limited reduction of a rule. Only paths passing through the edge 'link' are considered.
				void limitedReduce(const ReduceEnv &env, StackItem *through);

				// Produce error messages from the state set 'states'.
				void errorMsg(StrBuf *out, Nat pos, Array<StackItem *> *states) const;
				void errorMsg(Set<Str *> *errors, Nat state) const;

				// Produce error messages related to an unfullfilled requirement in the stack items provided.
				void reqErrorMsg(StrBuf *out, StackItem *states) const;
				Nat findMissingReq(Nat tree, ParentReq required) const;

				/**
				 * Tree computation.
				 */

				// Create the tree node starting at 'node'.
				Node *tree(const TreeNode &node, Nat endPos) const;

				// Fill in 'result' from the subtree generated by a pseudo-production.
				void subtree(Node *result, TreeNode &node, Nat endPos) const;

				// Fill in a token.
				void setToken(Node *result, TreeNode &node, Nat endPos, Token *token) const;

				// Create a syntax node for the production 'p'.
				Node *allocNode(Production *p, Nat start, Nat end) const;

				/**
				 * Info tree computation.
				 */

				// Create the info node starting at 'node'.
				InfoNode *infoTree(const TreeNode &node, Nat endPos) const;

				// Fill in 'result'. from the subtree generated by a pseudo-production. Returns # of errors.
				InfoErrors infoSubtree(InfoInternal *result, Nat &resultPos, TreeNode &node, Nat endPos) const;

				// Fill in a token. Returns # of errors.
				InfoErrors infoToken(InfoInternal *result, Nat &resultPos, TreeNode &node, Nat endPos, Token *token) const;

				// Compute the number of nodes for 'node' considering pseudo productions for repetitions.
				Nat totalLength(const TreeNode &node) const;

				/**
				 * Full info tree computation.
				 */

				// Construct completion nodes for the last, uncompleted states.
				void completePrefix();
			};

		}
	}
}