1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
|
(******************************************************************************)
(* *)
(* Menhir *)
(* *)
(* Copyright Inria. All rights reserved. This file is distributed under *)
(* the terms of the GNU Library General Public License version 2, with a *)
(* special exception on linking, as described in the file LICENSE. *)
(* *)
(******************************************************************************)
(* This signature defines the format of the parse tables. It is used as
an argument to [TableInterpreter.Make]. *)
module type TABLES = sig
(* This is the parser's type of tokens. *)
type token
(* This maps a token to its internal (generation-time) integer code. *)
val token2terminal: token -> int
(* This is the integer code for the error pseudo-token. *)
val error_terminal: int
(* This maps a token to its semantic value. *)
val token2value: token -> Obj.t
(* Traditionally, an LR automaton is described by two tables, namely, an
action table and a goto table. See, for instance, the Dragon book.
The action table is a two-dimensional matrix that maps a state and a
lookahead token to an action. An action is one of: shift to a certain
state, reduce a certain production, accept, or fail.
The goto table is a two-dimensional matrix that maps a state and a
non-terminal symbol to either a state or undefined. By construction, this
table is sparse: its undefined entries are never looked up. A compression
technique is free to overlap them with other entries.
In Menhir, things are slightly different. If a state has a default
reduction on token [#], then that reduction must be performed without
consulting the lookahead token. As a result, we must first determine
whether that is the case, before we can obtain a lookahead token and use it
as an index in the action table.
Thus, Menhir's tables are as follows.
A one-dimensional default reduction table maps a state to either ``no
default reduction'' (encoded as: 0) or ``by default, reduce prod''
(encoded as: 1 + prod). The action table is looked up only when there
is no default reduction. *)
val default_reduction: PackedIntArray.t
(* Menhir follows Dencker, Dürre and Heuft, who point out that, although the
action table is not sparse by nature (i.e., the error entries are
significant), it can be made sparse by first factoring out a binary error
matrix, then replacing the error entries in the action table with undefined
entries. Thus:
A two-dimensional error bitmap maps a state and a terminal to either
``fail'' (encoded as: 0) or ``do not fail'' (encoded as: 1). The action
table, which is now sparse, is looked up only in the latter case. *)
(* The error bitmap is flattened into a one-dimensional table; its width is
recorded so as to allow indexing. The table is then compressed via
[PackedIntArray]. The bit width of the resulting packed array must be
[1], so it is not explicitly recorded. *)
(* The error bitmap does not contain a column for the [#] pseudo-terminal.
Thus, its width is [Terminal.n - 1]. We exploit the fact that the integer
code assigned to [#] is greatest: the fact that the right-most column
in the bitmap is missing does not affect the code for accessing it. *)
val error: int (* width of the bitmap *) * string (* second component of [PackedIntArray.t] *)
(* A two-dimensional action table maps a state and a terminal to one of
``shift to state s and discard the current token'' (encoded as: s | 10),
``shift to state s without discarding the current token'' (encoded as: s |
11), or ``reduce prod'' (encoded as: prod | 01). *)
(* The action table is first compressed via [RowDisplacement], then packed
via [PackedIntArray]. *)
(* Like the error bitmap, the action table does not contain a column for the
[#] pseudo-terminal. *)
val action: PackedIntArray.t * PackedIntArray.t
(* A one-dimensional lhs table maps a production to its left-hand side (a
non-terminal symbol). *)
val lhs: PackedIntArray.t
(* A two-dimensional goto table maps a state and a non-terminal symbol to
either undefined (encoded as: 0) or a new state s (encoded as: 1 + s). *)
(* The goto table is first compressed via [RowDisplacement], then packed
via [PackedIntArray]. *)
val goto: PackedIntArray.t * PackedIntArray.t
(* The number of start productions. A production [prod] is a start
production if and only if [prod < start] holds. This is also the
number of start symbols. A nonterminal symbol [nt] is a start
symbol if and only if [nt < start] holds. *)
val start: int
(* A one-dimensional semantic action table maps productions to semantic
actions. The calling convention for semantic actions is described in
[EngineTypes]. This table contains ONLY NON-START PRODUCTIONS, so the
indexing is off by [start]. Be careful. *)
val semantic_action: ((int, Obj.t, token) EngineTypes.env ->
(int, Obj.t) EngineTypes.stack) array
(* The parser defines its own [Error] exception. This exception can be
raised by semantic actions and caught by the engine, and raised by the
engine towards the final user. *)
exception Error
(* The parser indicates whether to generate a trace. Generating a
trace requires two extra tables, which respectively map a
terminal symbol and a production to a string. *)
val trace: (string array * string array) option
end
|