File: InspectionTableInterpreter.ml

package info (click to toggle)
menhir 20220210%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 4,152 kB
  • sloc: ml: 32,565; sh: 209; makefile: 134; lisp: 8
file content (306 lines) | stat: -rw-r--r-- 10,716 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
(******************************************************************************)
(*                                                                            *)
(*                                    Menhir                                  *)
(*                                                                            *)
(*   Copyright Inria. All rights reserved. This file is distributed under     *)
(*   the terms of the GNU Library General Public License version 2, with a    *)
(*   special exception on linking, as described in the file LICENSE.          *)
(*                                                                            *)
(******************************************************************************)

(* -------------------------------------------------------------------------- *)

(* The type functor. *)

module Symbols (T : sig

  type 'a terminal
  type 'a nonterminal

end) = struct

  open T

  (* This should be the only place in the whole library (and generator!)
     where these types are defined. *)

  type 'a symbol =
    | T : 'a terminal -> 'a symbol
    | N : 'a nonterminal -> 'a symbol

  type xsymbol =
    | X : 'a symbol -> xsymbol

end

(* -------------------------------------------------------------------------- *)

(* The code functor. *)

module Make
  (TT : TableFormat.TABLES)
  (IT : InspectionTableFormat.TABLES
        with type 'a lr1state = int)
  (ET : EngineTypes.TABLE
        with type terminal = int
         and type nonterminal = int
         and type semantic_value = Obj.t)
  (E : sig
     type 'a env = (ET.state, ET.semantic_value, ET.token) EngineTypes.env
   end)
= struct

  (* Including [IT] is an easy way of inheriting the definitions of the types
     [symbol] and [xsymbol]. *)

  include IT

  (* This auxiliary function decodes a packed linearized array, as created by
     [TableBackend.linearize_and_marshal1]. Here, we read a row all at once. *)

  let read_packed_linearized
    (data, entry : PackedIntArray.t * PackedIntArray.t) (i : int) : int list
  =
    LinearizedArray.read_row_via
      (PackedIntArray.get data)
      (PackedIntArray.get entry)
      i

  (* This auxiliary function decodes a symbol. The encoding was done by
     [encode_symbol] or [encode_symbol_option] in the table back-end. *)

  let decode_symbol (symbol : int) : IT.xsymbol =
    (* If [symbol] is 0, then we have no symbol. This could mean e.g.
       that the function [incoming_symbol] has been applied to an
       initial state. In principle, this cannot happen. *)
    assert (symbol > 0);
    (* The low-order bit distinguishes terminal and nonterminal symbols. *)
    let kind = symbol land 1 in
    let symbol = symbol lsr 1 in
    if kind = 0 then
      IT.terminal (symbol - 1)
    else
      IT.nonterminal symbol

  (* These auxiliary functions convert a symbol to its integer code. For speed
     and for convenience, we use an unsafe type cast. This relies on the fact
     that the data constructors of the [terminal] and [nonterminal] GADTs are
     declared in an order that reflects their internal code. In the case of
     nonterminal symbols, we add [start] to account for the presence of the
     start symbols. *)

  let n2i (nt : 'a IT.nonterminal) : int =
    let answer = TT.start + Obj.magic nt in
    (* For safety, check that the above cast produced a correct result. *)
    assert (IT.nonterminal answer = X (N nt));
    answer

  let t2i (t : 'a IT.terminal) : int =
    let answer = Obj.magic t in
    (* For safety, check that the above cast produced a correct result. *)
    assert (IT.terminal answer = X (T t));
    answer

  (* Ordering functions. *)

  let compare_terminals t1 t2 =
    (* Subtraction is safe because overflow is impossible. *)
    t2i t1 - t2i t2

  let compare_nonterminals nt1 nt2 =
    (* Subtraction is safe because overflow is impossible. *)
    n2i nt1 - n2i nt2

  let compare_symbols symbol1 symbol2 =
    match symbol1, symbol2 with
    | X (T _), X (N _) ->
        -1
    | X (N _), X (T _) ->
        1
    | X (T t1), X (T t2) ->
        compare_terminals t1 t2
    | X (N nt1), X (N nt2) ->
        compare_nonterminals nt1 nt2

  let compare_productions prod1 prod2 =
    (* Subtraction is safe because overflow is impossible. *)
    prod1 - prod2

  let compare_items (prod1, index1) (prod2, index2) =
    let c = compare_productions prod1 prod2 in
    (* Subtraction is safe because overflow is impossible. *)
    if c <> 0 then c else index1 - index2

  (* The function [incoming_symbol] goes through the tables [IT.lr0_core] and
     [IT.lr0_incoming]. This yields a representation of type [xsymbol], out of
     which we strip the [X] quantifier, so as to get a naked symbol. This last
     step is ill-typed and potentially dangerous. It is safe only because this
     function is used at type ['a lr1state -> 'a symbol], which forces an
     appropriate choice of ['a]. *)

  let incoming_symbol (s : 'a IT.lr1state) : 'a IT.symbol =
    let core = PackedIntArray.get IT.lr0_core s in
    let symbol = decode_symbol (PackedIntArray.get IT.lr0_incoming core) in
    match symbol with
    | IT.X symbol ->
        Obj.magic symbol

  (* The function [lhs] reads the table [TT.lhs] and uses [IT.nonterminal]
     to decode the symbol. *)

  let lhs prod =
    IT.nonterminal (PackedIntArray.get TT.lhs prod)

  (* The function [rhs] reads the table [IT.rhs] and uses [decode_symbol]
     to decode the symbol. *)

  let rhs prod =
    List.map decode_symbol (read_packed_linearized IT.rhs prod)

  (* The function [items] maps the LR(1) state [s] to its LR(0) core,
     then uses [core] as an index into the table [IT.lr0_items]. The
     items are then decoded by the function [export] below, which is
     essentially a copy of [Item.export]. *)

  type item =
      int * int

  let low_bits =
    10

  let low_limit =
    1 lsl low_bits

  let export t : item =
    (t lsr low_bits, t mod low_limit)

  let items s =
    (* Map [s] to its LR(0) core. *)
    let core = PackedIntArray.get IT.lr0_core s in
    (* Now use [core] to look up the table [IT.lr0_items]. *)
    List.map export (read_packed_linearized IT.lr0_items core)

  (* The function [nullable] maps the nonterminal symbol [nt] to its
     integer code, which it uses to look up the array [IT.nullable].
     This yields 0 or 1, which we map back to a Boolean result. *)

  let decode_bool i =
    assert (i = 0 || i = 1);
    i = 1

  let nullable nt =
    decode_bool (PackedIntArray.get1 IT.nullable (n2i nt))

  (* The function [first] maps the symbols [nt] and [t] to their integer
     codes, which it uses to look up the matrix [IT.first]. *)

  let first nt t =
    decode_bool (PackedIntArray.unflatten1 IT.first (n2i nt) (t2i t))

  let xfirst symbol t =
    match symbol with
    | X (T t') ->
        compare_terminals t t' = 0
    | X (N nt) ->
        first nt t

  (* The function [foreach_terminal] exploits the fact that the
     first component of [TT.error] is [Terminal.n - 1], i.e., the
     number of terminal symbols, including [error] but not [#]. *)

  let rec foldij i j f accu =
    if i = j then
      accu
    else
      foldij (i + 1) j f (f i accu)

  let foreach_terminal f accu =
    let n, _ = TT.error in
    foldij 0 n (fun i accu ->
      f (IT.terminal i) accu
    ) accu

  let foreach_terminal_but_error f accu =
    let n, _ = TT.error in
    foldij 0 n (fun i accu ->
      if i = TT.error_terminal then
        accu
      else
        f (IT.terminal i) accu
    ) accu

  (* ------------------------------------------------------------------------ *)

  (* The following is the implementation of the function [feed]. This function
     is logically part of the LR engine, so it would be nice if it were placed
     in the module [Engine], but it must be placed here because, to ensure
     type safety, its arguments must be a symbol of type ['a symbol] and a
     semantic value of type ['a]. The type ['a symbol] is not available in
     [Engine]. It is available here. *)

  open EngineTypes
  open ET
  open E

  (* [feed] fails if the current state does not have an outgoing transition
     labeled with the desired symbol. This check is carried out at runtime. *)

  let feed_failure () =
    invalid_arg "feed: outgoing transition does not exist"

  (* Feeding a nonterminal symbol [nt]. Here, [nt] has type [nonterminal],
     which is a synonym for [int], and [semv] has type [semantic_value],
     which is a synonym for [Obj.t]. This type is unsafe, because pushing
     a semantic value of arbitrary type into the stack can later cause a
     semantic action to crash and burn. The function [feed] is given a safe
     type below. *)

  let feed_nonterminal
        (nt : nonterminal) startp (semv : semantic_value) endp (env : 'b env)
      : 'b env
  =
    (* Check if the source state has an outgoing transition labeled [nt].
       This is done by consulting the [goto] table. *)
    let source = env.current in
    match ET.maybe_goto_nt source nt with
    | None ->
        feed_failure()
    | Some target ->
        (* Push a new cell onto the stack, containing the identity of the state
           that we are leaving. The semantic value [semv] and positions [startp]
           and [endp] contained in the new cell are provided by the caller. *)
        let stack = { state = source; semv; startp; endp; next = env.stack } in
        (* Move to the target state. *)
        { env with stack; current = target }

  let reduce   _env _prod = feed_failure()
  let initiate _env       = feed_failure()

  let feed_terminal
        (terminal : terminal) startp (semv : semantic_value) endp (env : 'b env)
      : 'b env
  =
    (* Check if the source state has an outgoing transition labeled [terminal].
       This is done by consulting the [action] table. *)
    let source = env.current in
    ET.action source terminal semv
      (fun env _please_discard _terminal semv target ->
        (* There is indeed a transition toward the state [target].
           Push a new cell onto the stack and move to the target state. *)
        let stack = { state = source; semv; startp; endp; next = env.stack } in
        { env with stack; current = target }
      ) reduce initiate env

  (* The type assigned to [feed] ensures that the type of the semantic value
     [semv] is appropriate: it must be the semantic-value type of the symbol
     [symbol]. *)

  let feed (symbol : 'a symbol) startp (semv : 'a) endp env =
    let semv : semantic_value = Obj.repr semv in
    match symbol with
    | N nt ->
        feed_nonterminal (n2i nt) startp semv endp env
    | T terminal ->
        feed_terminal (t2i terminal) startp semv endp env

end