1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
|
#pragma once
#include "Core/Array.h"
#include "Core/Map.h"
#include "Core/Set.h"
#include "Core/EnginePtr.h"
#include "Compiler/Syntax/Rule.h"
#include "Compiler/Syntax/Production.h"
#include "ParentReq.h"
namespace storm {
namespace syntax {
namespace glr {
STORM_PKG(lang.bnf.glr);
class StackItem;
class Syntax;
class Item;
/**
* Representation of all syntax in a parser.
*
* To properly handle the extensions supported by the bnf language, we introduce
* additional nonterminals to the grammar, and transform them as follows:
* X -> a (b)? c => X -> a X' c
* => X'-> e
* => X'-> b
* X -> a (b)+ c => X -> a b X' c
* => X'-> e
* => X'-> X' b
* X -> a (b)* c => X -> a X' c
* => X'-> e
* => X'-> X' b
*
* Because of this, we number all rules and all productions and apply the following
* scheme for the added rules:
* - Rules with the highest bit set describe the added production if neccessary. This is
* never stored in the list in the Syntax class. Note that the remaining part of the
* number is an index to a *production*, not a rule. If the second highest bit is set,
* this rule is considered to be a rule that has a single production that matches the
* empty string. This is used to properly handle productions that may match nothing.
* - Productions stored in items with the highest bit set describe the epsilon production
* of the added rule. If the two highest bits are set, that means the other added production.
*/
/**
* Information about a rule.
*/
class RuleInfo : public Object {
STORM_CLASS;
public:
// Create.
STORM_CTOR RuleInfo();
// Count.
inline Nat STORM_FN count() const { return productions ? productions->count() : 0; }
// Access elements.
inline Nat STORM_FN operator[](Nat id) const { return productions ? productions->at(id) : 0; }
inline Nat at(Nat id) const { return productions ? productions->at(id) : 0; }
// Add a production.
void STORM_FN push(Nat id);
// Add 'regex' to the follow-set of this rule.
void STORM_FN follows(Regex regex);
// Add the follow set of 'rule' to ours.
void STORM_FN follows(Nat rule);
// Add the first set of 'rule' to the follow set of us.
void STORM_FN followsFirst(Nat rule);
// Get the first set of this rule.
Set<Regex> *STORM_FN first(Syntax *syntax);
// Get the follow set of this rule.
Set<Regex> *STORM_FN follows(Syntax *syntax);
private:
// All productions for this rule. May be null.
Array<Nat> *productions;
// The follow-set of this rule. May be null.
Set<Regex> *followSet;
// The follow-set of the rules shall be merged with the follow set in here.
Set<Nat> *followRules;
// The first-set of these rules shall be merged with the follow set in here.
Set<Nat> *firstRules;
// Computing first and follow sets.
Bool compFirst;
Bool compFollows;
};
/**
* All syntax in a parser.
*
* Assigns an identifier to each rule production to make things easier down the line.
*
* Each rule is also possibly assigned a "parent id". This is the rule's id when it is
* referred to as a requirement for a production. These are distinct from the regular
* IDs since we expect there to be relatively few of them in use simultaneously, meaning
* that we can use them as offsets into a bitmask when storing them.
*
* Also, computes the follow set for each non-terminal encountered.
*/
class Syntax : public ObjectOn<Compiler> {
STORM_CLASS;
public:
// Create.
STORM_CTOR Syntax();
// Add syntax.
Nat STORM_FN add(Rule *rule);
Nat STORM_FN add(Production *p);
// Find the ID of a rule, a production or a regex.
Nat STORM_FN lookup(Rule *rule);
Nat STORM_FN lookup(Production *p);
// Get all productions for a rule.
RuleInfo *STORM_FN ruleInfo(Nat rule);
// Get the name of a rule.
Str *STORM_FN ruleName(Nat rule) const;
// Find a production from its id.
Production *STORM_FN production(Nat id) const;
// Get the parent id for a rule, expressed as a ParentReq structure.
ParentReq STORM_FN parentId(Nat rule) const;
// Get the parent requirement of a production.
ParentReq STORM_FN productionReq(Nat id) const;
// Same syntax as another object?
Bool STORM_FN sameSyntax(Syntax *o) const;
private:
// All known rules and their ID:s.
Map<Rule *, Nat> *rLookup;
// All known productions and their ID:s.
Map<Production *, Nat> *pLookup;
// All known rules.
Array<Rule *> *rules;
// Productions for all rules.
Array<RuleInfo *> *ruleProds;
// Productions for repetition rules. Entries in here are created lazily.
Array<RuleInfo *> *repRuleProds;
// All productions. A production's id can be found in 'lookup'.
Array<Production *> *productions;
// Parent id:s for all productions.
Map<Nat, ParentReq> *parentIds;
// The parent id of a production. Stores 'noParent' if none.
Array<ParentReq> *prodParents;
// Add the follow-set of a production.
void addFollows(Production *p);
void addFollows(const Item &start, Production *p);
// Add the thing 'Item' refers to to the rule info 'into'.
void addFollows(RuleInfo *into, const Item &pos);
public:
// Various masks for rules and productions.
enum {
ruleMask = 0xC0000000,
ruleRepeat = 0x80000000,
ruleESkip = 0x40000000,
prodEpsilon = 0x80000000,
prodESkip = 0x40000000,
prodRepeat = 0xC0000000,
prodMask = 0xC0000000,
};
// Is this a special rule id? Returns ruleRepeat or ruleESkip.
static inline Nat specialRule(Nat id) {
return id & ruleMask;
}
// Get the production id this rule was derived from.
static Nat baseRule(Nat id) {
return id & ~Nat(ruleMask);
}
// Is this a special production id? Returns either prodEpsilon, prodESkip, prodMask or 0.
static inline Nat specialProd(Nat id) {
return id & prodMask;
}
// Get the production id.
static inline Nat baseProd(Nat id) {
return id & ~Nat(prodMask);
}
};
}
}
}
|