File: grammar.mli

package info (click to toggle)
menhir 20060615.dfsg-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 1,024 kB
  • ctags: 1,474
  • sloc: ml: 10,279; makefile: 124; sh: 38
file content (384 lines) | stat: -rw-r--r-- 12,574 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
(**************************************************************************)
(*                                                                        *)
(*  Menhir                                                                *)
(*                                                                        *)
(*  Franois Pottier and Yann Rgis-Gianas, INRIA Rocquencourt            *)
(*                                                                        *)
(*  Copyright 2005 Institut National de Recherche en Informatique et      *)
(*  en Automatique. All rights reserved. This file is distributed         *)
(*  under the terms of the Q Public License version 1.0, with the         *)
(*  change described in file LICENSE.                                     *)
(*                                                                        *)
(**************************************************************************)

(* This module transforms [Front.grammar], an abstract syntax tree for
   the grammar, into an internal representation of the grammar that is
   more usable. *)

(* ------------------------------------------------------------------------ *)
(* Nonterminals. *)

module Nonterminal : sig

  (* The type of nonterminals. *)

  type t

  (* The number of nonterminals. This includes the extra nonterminals
     that are internally generated for the grammar's entry points. *)

  val n: int

  (* This produces a string representation of a nonterminal. It should
     in principle never be applied to one of the internally generated
     nonterminals, as we do not wish users to become aware of the
     existence of these extra nonterminals. However, we do sometimes
     violate this rule when it is difficult to do otherwise.

     The Boolean parameter tells whether the string representation
     should be normalized, that is, whether parentheses and commas
     should be eliminated. This is necessary if the string is intended
     for use as a valid nonterminal name or as a valid Objective Caml
     identifier. *)

  val print: bool -> t -> string

  (* This is the Objective Caml type associated with a nonterminal
     symbol. It is known only if a %type declaration was provided.
     This function is not applicable to the internally generated
     nonterminals. *)

  val ocamltype: t -> Stretch.ocamltype option

  (* Iteration over nonterminals. *)

  val iter: (t -> unit) -> unit
  val fold: (t -> 'a -> 'a) -> 'a -> 'a

  (* Iteration over all nonterminals, except the start nonterminals. *)

  val foldx: (t -> 'a -> 'a) -> 'a -> 'a  

  (* Tabulation of a function over nonterminals. *)

  val tabulate: (t -> 'a) -> (t -> 'a)

  (* [positions nt] is a list of the positions associated with the
     definition of [nt]. There can be more than one position because
     definitions can be split over multiple files. *)

  val positions: t -> Positions.t list

end

(* ------------------------------------------------------------------------ *)
(* Terminals. *)

module Terminal : sig

  (* The type of terminals. *)

  type t

  (* The number of terminals. This includes the two pseudo-tokens
     [#] and [error]. *)

  val n: int

  (* Comparison. *)

  val equal: t -> t -> bool

  (* This produces a string representation of a terminal. *)

  val print: t -> string

  (* This is the Objective Caml type associated with a terminal
     symbol. It is known only if the %token declaration was
     accompanied with a type. *)

  val ocamltype: t -> Stretch.ocamltype option

  (* These are the two pseudo-tokens [#] and [error]. The former is
     used to denote the end of the token stream. The latter is
     accessible to the user and is used for handling errors. *)

  val sharp: t
  val error: t

  (* This is the programmer-defined [EOF] token, if there is one. It
     is recognized based solely on its name, which is fragile, but
     this behavior is documented. This token is assumed to represent
     [ocamllex]'s [eof] pattern. It is used only in emitting warnings
     in [--error-recovery] mode. *)

  val eof: t option

  (* This returns [true] if and only if the token at hand is one of
     [#] or [error]. *)

  val pseudo: t -> bool

  (* Iteration over terminals. *)

  val iter: (t -> unit) -> unit
  val fold: (t -> 'a -> 'a) -> 'a -> 'a

end

(* ------------------------------------------------------------------------ *)
(* Sets and maps over terminals. *)

module TerminalSet : sig

  (* All of the operations documented in [GSet] are available. *)

  include GSet.S with type element = Terminal.t

  (* This offers a string representation of a set of terminals. The
     symbols are simply listed one after the other and separated with
     spaces. *)

  val print: t -> string

  (* This is the set of all terminal symbols except the pseudo-tokens
     [#] and [error]. *)

  val universe: t

end

(* All of the operations documented in [GMap] are available. *)

module TerminalMap : GMap.S with type key = Terminal.t

(* ------------------------------------------------------------------------ *)
(* Symbols. *)

module Symbol : sig

  (* A symbol is either a nonterminal or a terminal. *)

  type t =
    | N of Nonterminal.t
    | T of Terminal.t

  (* Comparison. *)

  val equal: t -> t -> bool
  val lequal: t list -> t list -> bool

  (* These produce a string representation of a symbol, of a list of
     symbols, or of an array of symbols. The symbols are simply listed
     one after the other and separated with spaces. [printao] prints
     an array of symbols, starting at a particular offset. [printaod]
     is analogous, but can also print a single dot at a particular
     position between two symbols. *)

  val print: t -> string
  val printl: t list -> string
  val printa: t array -> string
  val printao: int -> t array -> string
  val printaod: int -> int -> t array -> string

end

(* ------------------------------------------------------------------------ *)
(* Sets and maps over symbols. *)

(* All of the operations documented in [Set] are available. *)

module SymbolSet : Set.S with type elt = Symbol.t

module SymbolMap : sig

  (* All of the operations documented in [Map] are available. *)

  include Map.S with type key = Symbol.t

  (* This returns [true] if and only if all of the symbols in
     the domain of the map at hand are nonterminals. *)

  val purelynonterminal: 'a t -> bool

end

(* ------------------------------------------------------------------------ *)
(* Productions. *)

module Production : sig

  (* This is the type of productions. This includes user-defined
     productions as well as the internally generated productions
     associated with the start symbols. *)

  type index

  (* Productions can be converted to integers and back. This is unsafe
     and should be avoided as much as possible. This feature is
     exploited, for efficiency, in the encoding of items. *)

  val p2i: index -> int
  val i2p: int -> index

  (* The number of productions. *)

  val n: int

  (* These map a production index to the production's definition, that
     is, a nonterminal (the left-hand side) and an array of symbols
     (the right-hand side). *)

  val def: index -> Nonterminal.t * Symbol.t array
  val nt: index -> Nonterminal.t
  val rhs: index -> Symbol.t array
  val length: index -> int

  (* This maps a production index to an array of the identifiers that
     should be used for naming the semantic values of the symbols in
     the right-hand side. *)

  val identifiers: index -> Syntax.identifier array

  (* This maps a production index to an array of Boolean flag. Each
     flag tells whether the semantic value of the corresponding symbol
     is used in the semantic action. This is a conservative
     approximation: [true] means maybe, while [false] means certainly
     not. *)

  val used: index -> bool array  

  (* This maps a production index to the production's semantic action.
     This function is not applicable to a start production. *)

  val action: index -> Syntax.action

  (* [positions prod] is a list of the positions associated with
     production [prod]. This is usually a singleton list, but there
     can be more than one position for start productions when the
     definition of the corresponding start symbol is split over
     multiple files. *)

  val positions: index -> Positions.t list

  (* Iteration over all productions. *)

  val iter: (index -> unit) -> unit
  val fold: (index -> 'a -> 'a) -> 'a -> 'a

  (* Iteration over all productions, except the start productions. *)

  val iterx: (index -> unit) -> unit
  val foldx: (index -> 'a -> 'a) -> 'a -> 'a  

  (* Iteration over the start productions only. *)

  val maps: (index -> 'a) -> 'a array

  (* Iteration over the productions associated with a specific
     nonterminal. *)

  val iternt: Nonterminal.t -> (index -> unit) -> unit
  val foldnt: Nonterminal.t -> 'a -> (index -> 'a -> 'a) -> 'a

  (* This allows determining whether a production is a start
     production. If it is a start production, the start symbol that it
     is associated with is returned. If it is a regular production,
     nothing is returned. *)

  val classify: index -> Nonterminal.t option

  (* This produces a string representation of a production. It should
     never be applied to a start production, as we do not wish users
     to become aware of the existence of these extra productions. *)

  val print: index -> string

  (* Tabulation of a Boolean function over nonterminals. [tabulate f]
     returns a tabulated version of [f] as well as the number of
     productions where [f] is true. *)

  val tabulate: (index -> bool) -> (index -> bool) * int

end

(* ------------------------------------------------------------------------ *)
(* Maps over productions. *)

module ProductionMap : GMap.S with type key = Production.index

(* ------------------------------------------------------------------------ *)
(* Analysis of the grammar. *)

module Analysis : sig

  (* [nullable_first_rhs rhs i] considers the string of symbols found at
     offset [i] in the array [rhs]. It returns its NULLABLE flag as well
     as its FIRST set. The offset [i] must be contained between [0] and
     [n], where [n] is the length of [rhs], inclusive. *)

  val nullable_first_rhs: Symbol.t array -> int -> bool * TerminalSet.t

  (* [explain_first_rhs tok rhs i] explains why the token [tok] appears
     in the FIRST set for the string of symbols found at offset [i] in
     the array [rhs]. *)

  val explain_first_rhs: Terminal.t -> Symbol.t array -> int -> string

end

(* ------------------------------------------------------------------------ *)
(* Conflict resolution via precedences. *)

module Precedence : sig

  (* Shift/reduce conflicts require making a choice between shifting a
     token and reducing a production. How these choices are made is of
     no concern to the back-end, but here is a rough explanation.

     Shifting is preferred when the token has higher precedence than
     the production, or they have same precedence and the token is
     right-associative.

     Reducing is preferred when the token has lower precedence than
     the production, or they have same precedence and the token is
     left-associative.

     Neither is allowed when the token and the production have same
     precedence and the token is non-associative.

     No preference is explicitly specified when the token or the
     production has undefined precedence. In that case, the default
     choice is to prefer shifting, but a conflict will be reported. *)

  type choice =
    | ChooseShift
    | ChooseReduce
    | ChooseNeither
    | DontKnow

  val shift_reduce: Terminal.t -> Production.index -> choice

  (* Reduce/reduce conflicts require making a choice between reducing
     two distinct productions. This is done by exploiting a partial
     order on productions.

     For compatibility with ocamlyacc, this order should be total and
     should correspond to textual order when the two productions
     originate in the same source file. When they originate in
     different source files, the two productions should be
     incomparable. *)

  val reduce_reduce: Production.index -> Production.index -> Production.index option

end

(* ------------------------------------------------------------------------ *)
(* Diagnostics. *)

(* This function prints diagnostics about precedence declarations that
   are never consulted. It is called after the automaton is
   constructed. *)

val diagnostics: unit -> unit