File: Syntax.h

package info (click to toggle)
storm-lang 0.7.4-1
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 52,004 kB
  • sloc: ansic: 261,462; cpp: 140,405; sh: 14,891; perl: 9,846; python: 2,525; lisp: 2,504; asm: 860; makefile: 678; pascal: 70; java: 52; xml: 37; awk: 12
file content (211 lines) | stat: -rw-r--r-- 6,390 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
#pragma once
#include "Core/Array.h"
#include "Core/Map.h"
#include "Core/Set.h"
#include "Core/EnginePtr.h"
#include "Compiler/Syntax/Rule.h"
#include "Compiler/Syntax/Production.h"
#include "ParentReq.h"

namespace storm {
	namespace syntax {
		namespace glr {
			STORM_PKG(lang.bnf.glr);

			class StackItem;
			class Syntax;
			class Item;

			/**
			 * Representation of all syntax in a parser.
			 *
			 * To properly handle the extensions supported by the bnf language, we introduce
			 * additional nonterminals to the grammar, and transform them as follows:
			 * X -> a (b)? c   => X -> a X' c
			 *                 => X'-> e
			 *                 => X'-> b
			 * X -> a (b)+ c   => X -> a b X' c
			 *                 => X'-> e
			 *                 => X'-> X' b
			 * X -> a (b)* c   => X -> a X' c
			 *                 => X'-> e
			 *                 => X'-> X' b
			 *
			 * Because of this, we number all rules and all productions and apply the following
			 * scheme for the added rules:
			 * - Rules with the highest bit set describe the added production if neccessary. This is
			 *   never stored in the list in the Syntax class. Note that the remaining part of the
			 *   number is an index to a *production*, not a rule. If the second highest bit is set,
			 *   this rule is considered to be a rule that has a single production that matches the
			 *   empty string. This is used to properly handle productions that may match nothing.
			 * - Productions stored in items with the highest bit set describe the epsilon production
			 *   of the added rule. If the two highest bits are set, that means the other added production.
			 */


			/**
			 * Information about a rule.
			 */
			class RuleInfo : public Object {
				STORM_CLASS;
			public:
				// Create.
				STORM_CTOR RuleInfo();

				// Count.
				inline Nat STORM_FN count() const { return productions ? productions->count() : 0; }

				// Access elements.
				inline Nat STORM_FN operator[](Nat id) const { return productions ? productions->at(id) : 0; }
				inline Nat at(Nat id) const { return productions ? productions->at(id) : 0; }

				// Add a production.
				void STORM_FN push(Nat id);

				// Add 'regex' to the follow-set of this rule.
				void STORM_FN follows(Regex regex);

				// Add the follow set of 'rule' to ours.
				void STORM_FN follows(Nat rule);

				// Add the first set of 'rule' to the follow set of us.
				void STORM_FN followsFirst(Nat rule);

				// Get the first set of this rule.
				Set<Regex> *STORM_FN first(Syntax *syntax);

				// Get the follow set of this rule.
				Set<Regex> *STORM_FN follows(Syntax *syntax);

			private:
				// All productions for this rule. May be null.
				Array<Nat> *productions;

				// The follow-set of this rule. May be null.
				Set<Regex> *followSet;

				// The follow-set of the rules shall be merged with the follow set in here.
				Set<Nat> *followRules;

				// The first-set of these rules shall be merged with the follow set in here.
				Set<Nat> *firstRules;

				// Computing first and follow sets.
				Bool compFirst;
				Bool compFollows;
			};


			/**
			 * All syntax in a parser.
			 *
			 * Assigns an identifier to each rule production to make things easier down the line.
			 *
			 * Each rule is also possibly assigned a "parent id". This is the rule's id when it is
			 * referred to as a requirement for a production. These are distinct from the regular
			 * IDs since we expect there to be relatively few of them in use simultaneously, meaning
			 * that we can use them as offsets into a bitmask when storing them.
			 *
			 * Also, computes the follow set for each non-terminal encountered.
			 */
			class Syntax : public ObjectOn<Compiler> {
				STORM_CLASS;
			public:
				// Create.
				STORM_CTOR Syntax();

				// Add syntax.
				Nat STORM_FN add(Rule *rule);
				Nat STORM_FN add(Production *p);

				// Find the ID of a rule, a production or a regex.
				Nat STORM_FN lookup(Rule *rule);
				Nat STORM_FN lookup(Production *p);

				// Get all productions for a rule.
				RuleInfo *STORM_FN ruleInfo(Nat rule);

				// Get the name of a rule.
				Str *STORM_FN ruleName(Nat rule) const;

				// Find a production from its id.
				Production *STORM_FN production(Nat id) const;

				// Get the parent id for a rule, expressed as a ParentReq structure.
				ParentReq STORM_FN parentId(Nat rule) const;

				// Get the parent requirement of a production.
				ParentReq STORM_FN productionReq(Nat id) const;

				// Same syntax as another object?
				Bool STORM_FN sameSyntax(Syntax *o) const;

			private:
				// All known rules and their ID:s.
				Map<Rule *, Nat> *rLookup;

				// All known productions and their ID:s.
				Map<Production *, Nat> *pLookup;

				// All known rules.
				Array<Rule *> *rules;

				// Productions for all rules.
				Array<RuleInfo *> *ruleProds;

				// Productions for repetition rules. Entries in here are created lazily.
				Array<RuleInfo *> *repRuleProds;

				// All productions. A production's id can be found in 'lookup'.
				Array<Production *> *productions;

				// Parent id:s for all productions.
				Map<Nat, ParentReq> *parentIds;

				// The parent id of a production. Stores 'noParent' if none.
				Array<ParentReq> *prodParents;

				// Add the follow-set of a production.
				void addFollows(Production *p);
				void addFollows(const Item &start, Production *p);

				// Add the thing 'Item' refers to to the rule info 'into'.
				void addFollows(RuleInfo *into, const Item &pos);

			public:
				// Various masks for rules and productions.
				enum {
					ruleMask    = 0xC0000000,
					ruleRepeat  = 0x80000000,
					ruleESkip   = 0x40000000,
					prodEpsilon = 0x80000000,
					prodESkip   = 0x40000000,
					prodRepeat  = 0xC0000000,
					prodMask    = 0xC0000000,
				};

				// Is this a special rule id? Returns ruleRepeat or ruleESkip.
				static inline Nat specialRule(Nat id) {
					return id & ruleMask;
				}

				// Get the production id this rule was derived from.
				static Nat baseRule(Nat id) {
					return id & ~Nat(ruleMask);
				}

				// Is this a special production id? Returns either prodEpsilon, prodESkip, prodMask or 0.
				static inline Nat specialProd(Nat id) {
					return id & prodMask;
				}

				// Get the production id.
				static inline Nat baseProd(Nat id) {
					return id & ~Nat(prodMask);
				}

			};

		}
	}
}