File: Parser.h

package info (click to toggle)
storm-lang 0.7.5-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 52,028 kB
  • sloc: ansic: 261,471; cpp: 140,432; sh: 14,891; perl: 9,846; python: 2,525; lisp: 2,504; asm: 860; makefile: 678; pascal: 70; java: 52; xml: 37; awk: 12
file content (168 lines) | stat: -rw-r--r-- 4,868 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#pragma once
#include "Compiler/NamedThread.h"
#include "Compiler/Syntax/ParserBackend.h"
#include "Compiler/Syntax/Node.h"
#include "Compiler/Syntax/InfoNode.h"
#include "Compiler/Syntax/Rule.h"
#include "Compiler/Syntax/Production.h"
#include "Compiler/Syntax/Earley/State.h"
#include "Core/Array.h"
#include "Core/Map.h"
#include "OS/FnCall.h"
#include "Compiler/Exception.h"

namespace storm {
	namespace syntax {
		namespace earley {
			STORM_PKG(lang.bnf.earley);

			/**
			 * The parser here is the main parser in Storm and is based on the parser described by Jay
			 * Earley in 'An Efficient Context-Free Parsing Algorithm'.
			 *
			 * TODO: Make it possible to parse things on other threads than the compiler thread.
			 */
			class Parser : public ParserBackend {
				STORM_CLASS;
			public:
				// Create.
				STORM_CTOR Parser();

			protected:
				// Add a package containing syntax definitions (not recursive).
				virtual void add(Rule *rule);
				virtual void add(ProductionType *production);

				// Does this parser contain the same syntax as 'o'?
				virtual Bool sameSyntax(ParserBackend *o);

				// Parse a string. Returns 'true' if we found some match.
				virtual Bool parse(Rule *root, Str *str, Url *file, Str::Iter start, Str::Iter end);

				// Clear all parse-related information. Included packages are retained.
				virtual void clear();

				/**
				 * Operations on the last parse.
				 */

				// Found any errors? If Str::Iter is not end, this is always true. Note that even if we
				// have an error, it could be possible to extract a tree!
				virtual Bool hasError() const;

				// Is it possible to extract a syntax tree? (equivalent to the return value of 'parse').
				virtual Bool hasTree() const;

				// Return an iterator after the last matched character, or the start of the string if no
				// match could be made.
				virtual Str::Iter matchEnd() const;

				// Get the error message.
				virtual Str *errorMsg() const;

				// Get the error position.
				virtual SrcPos errorPos() const;

				// Get the syntax tree.
				virtual Node *tree() const;

				// Get the generic syntax tree.
				virtual InfoNode *infoTree() const;


				/**
				 * Performance inspection:
				 */

				// Get the number of states used.
				virtual Nat stateCount() const;

				// Get the number of bytes used.
				virtual Nat byteCount() const;

			private:
				// Information about a rule.
				class RuleInfo {
					STORM_VALUE;
				public:
					STORM_CTOR RuleInfo();

					// All productions for this rule. May be null.
					Array<Production *> *productions;

					// Can this rule match the empty string? 0 = no, 1 = yes, >=2 don't know yet.
					Byte matchesNull;

					// Add a production. Handles the case where 'productions' is null.
					void push(Production *p);
				};

				// All rules we know of so far, and their options.
				Map<Rule *, RuleInfo> *rules;

				// Parsed source string.
				Str *src;

				// Current file and start position.
				MAYBE(Url *) srcFile;
				Nat srcOffset;

				// Root production.
				Production *rootProd;

				// Steps. Each step corresponds to a character in the input string (including an
				// implicit end of string character).
				GcArray<StateSet *> *steps;

				// Last state containing a finish step (initialized to something >= states.count).
				nat lastFinish;

				// Process a single step. Returns true if we found an accepting state here.
				bool process(nat step);

				// Predictor, completer and scanner as described in the paper. The StateSet should be
				// the current step, ie. the set in which 'state' belongs.
				void predictor(StatePtr ptr, const State &state);
				void completer(StatePtr ptr, const State &state);
				void scanner(StatePtr ptr, const State &state);

				// Does 'rule' match an empty string?
				bool matchesEmpty(Rule *r);
				bool matchesEmpty(RuleInfo &info);
				bool matchesEmpty(Production *p);
				bool matchesEmpty(Token *t);

				// Find the last step which is not empty.
				nat lastStep() const;

				// Find the finishing state (the last one if there are more).
				const State *finish() const;

				// Find all rules and productions in progress for a given state.
				Map<Str *, StrBuf *> *inProgress(const StateSet &step) const;

				// Create a tree for the production ending in 'end'.
				Node *tree(StatePtr end) const;

				// Allocate a tree node.
				Node *allocNode(const State &from, Nat endStep) const;

				// Reverse all arrays in a node.
				void reverseNode(Node *node) const;

				// Empty string used in the info tree.
				Str *emptyString;

				// Create a tree for the production ending in 'end'.
				InfoNode *infoTree(StatePtr end) const;

				// Get a State from a StatePtr.
				const State &state(const StatePtr &p) const;

				// State set needs to access 'state()'
				friend class StateSet;
			};

		}
	}
}