1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
|
COMPILER Umbriel
(* COCO/R Grammar for Umbriel
Umbriel is a simple Modula-2 / Pascal like language
This grammar is LL(1), so a Coco/R generated parser will be correct *)
CHARACTERS
eol = CHR(13) .
letter = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" .
digit = "0123456789" .
noQuote1 = ANY - "'" - eol .
noQuote2 = ANY - '"' - eol .
IGNORE
CHR(9) .. CHR(13)
COMMENTS FROM '(*' TO '*)' NESTED
(* The language is case sensitive *)
TOKENS
identifier = letter { letter | digit } .
char = digit {digit} "C" .
(* The xxC variation uses decimal numbers, not the octal form of Wirth's other
languages *)
integer = digit { digit }
| digit { digit } CONTEXT ("..") .
real = digit { digit } "." { digit }
[ ( "E" | "e" ) [ "+" | "-" ] digit { digit } ] .
string = "'" { noQuote1 } "'" | '"' { noQuote2 } '"' .
PRODUCTIONS
Umbriel = "MODULE" ModuleIdentifier ";"
Block ModuleIdentifier "." .
ModuleIdentifier = identifier .
(* The same identifier appears in both places *)
Block = { Declaration }
[ "BEGIN" StatementSequence ] "END" .
Declaration = "CONST" { ConstantDeclaration ";" }
| "TYPE" { TypeDeclaration ";" }
| "VAR" { VariableDeclaration ";" }
| ProcedureDeclaration ";" .
(* Declarations may be mixed in order, but for all identifiers
declaration-before-use is required *)
ConstantDeclaration = ConstIdentifier "=" ConstExpression .
ConstIdentifier = identifier .
ConstExpression = Expression .
(* A ConstExpression may only contain constants, literals, and some standard
function calls *)
TypeDeclaration = TypeIdentifier "=" Type .
Type = TypeIdentifier | ArrayType | RecordType .
TypeIdentifier = identifier .
ArrayType = "ARRAY" IndexType { "," IndexType } "OF" Type .
IndexType = "[" ConstExpression ".." ConstExpression "]" .
(* The index type is an anonymous subrange type. It has to be declared like
this; implicit subranges like CHAR and BOOLEAN may not be used by quoting
the type names. *)
RecordType = "RECORD" FieldListSequence "END" .
FieldListSequence = FieldList { ";" FieldList } .
FieldList = [ IdentList ":" Type ] .
IdentList = VariableIdentifier { "," VariableIdentifier } .
VariableIdentifier = identifier .
(* The syntax allows the "permissive" use of extra ; symbols as in Modula-2,
and also completely empty records, as in Wirth's other languages *)
VariableDeclaration = IdentList ":" TypeIdentifier .
(* Unlike Modula-2 and Pascal, a variable has to be declared in terms of a
named type. This is to encourage simple abstraction. *)
ProcedureDeclaration = "PROCEDURE" ProcedureIdentifier
[ ParameterDeclarations ]
";" Block ProcedureIdentifier .
ParameterDeclarations = "(" [ FormalParameters ] ")" [ ":" ResultType ] .
ProcedureIdentifier = identifier .
ResultType = ScalarTypeIdentifier .
ScalarTypeIdentifier = TypeIdentifier .
(* Procedures may omit the ( brackets ) if there are no parameters.
Functions may have no parameters, but require the ( brackets ),
as in Modula-2.
Functions may only return INTEGER, REAL, CHAR, COLORS or BOOLEAN values *)
FormalParameters = FormalParameter { ";" FormalParameter } .
FormalParameter = ValueSpecification | VariableSpecification .
ValueSpecification = IdentList ":" TypeIdentifier .
VariableSpecification = "VAR" IdentList ":" TypeIdentifier .
StatementSequence = Statement { ";" Statement } .
Statement = [ AssignmentOrCall
| IfStatement | CaseStatement
| WhileStatement | RepeatStatement
| ForStatement | LoopStatement
| ExitStatement | ReturnStatement ] .
(* The statements are as in Modula-2, other than WITH *)
AssignmentOrCall = VarOrProcIdentifier
( { Selector } ":=" Expression
| [ "(" [ ActualParameters ] ")" ] ) .
VarOrProcIdentifier = identifier .
Selector = "." VariableIdentifier | "[" IndexList "]" .
IndexList = OrdinalExpression { "," OrdinalExpression } .
ActualParameters = Expression [ FormatSpecifier ]
{ "," Expression [ FormatSpecifier ] } .
FormatSpecifier = ":" IntegerExpression [ ":" IntegerExpression ] .
(* FormatSpecifier is only allowed in calls to the standard procedures
Write and WriteLn *)
IfStatement = "IF" BooleanExpression "THEN" StatementSequence
{ "ELSIF" BooleanExpression
"THEN" StatementSequence }
[ "ELSE" StatementSequence ] "END" .
CaseStatement = "CASE" Expression "OF" Case { "|" Case }
[ "ELSE" StatementSequence ] "END" .
Case = [ CaseLabelList ":" StatementSequence ] .
CaseLabelList = CaseLabels { "," CaseLabels } .
CaseLabels = ConstExpression [ ".." ConstExpression ] .
(* The syntax allows the "permissive" use of extra | symbols as in Modula-2 *)
WhileStatement = "WHILE" BooleanExpression "DO"
StatementSequence
"END" .
RepeatStatement = "REPEAT"
StatementSequence
"UNTIL" BooleanExpression .
ForStatement = "FOR" VariableIdentifier ":=" OrdinalExpression
"TO" OrdinalExpression [ "BY" ConstExpression ] "DO"
StatementSequence
"END" .
(* The BY step requires a constant expression so that the compiler can decide
at compile time whether it is a "to" or "downto" loop. The control
variable must be declared locally, and the implementation tries to detect
situations where it would be threatened (changed in the body of the loop) *)
LoopStatement = "LOOP" StatementSequence "END" .
ExitStatement = "EXIT" .
(* An ExitStatement is only allowed within the StatementSequence of a
LoopStatement. It transfers control out of the closest enclosing
LoopStatement. *)
ReturnStatement = "RETURN" [ Expression ] .
(* A ReturnStatement within a function must incorporate an Expression; within
a regular procedure or a program block it must not incorporate an
Expression. *)
Expression = SimpleExpression [ Relation SimpleExpression ] .
BooleanExpression = Expression .
OrdinalExpression = Expression .
IntegerExpression = Expression .
SimpleExpression = [ "+" | "-" ] Term { AddOperator Term } .
Term = Factor { MulOperator Factor } .
Factor = ConstantLiteral
| VarOrFuncOrConstIdentifier
( { Selector } | "(" [ ActualParameters ] ")" )
| NotOperator Factor
| "(" Expression ")" .
ConstantLiteral = integer | real | char | string .
VarOrFuncOrConstIdentifier = identifier .
NotOperator = "NOT" | "~" (* synonyms *) .
MulOperator = "*" | "/" | "DIV" | "MOD" | AndOperator .
AndOperator = "AND" | "&" (* synonyms *) .
AddOperator = "+" | "-" | "OR" .
Relation = "=" | "#" | "<>" | "<" | "<=" | ">" | ">=" .
END Umbriel.
|