File: Engine.ml

package info (click to toggle)
menhir 20220210%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 4,152 kB
  • sloc: ml: 32,565; sh: 209; makefile: 134; lisp: 8
file content (944 lines) | stat: -rw-r--r-- 37,947 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
(******************************************************************************)
(*                                                                            *)
(*                                    Menhir                                  *)
(*                                                                            *)
(*   Copyright Inria. All rights reserved. This file is distributed under     *)
(*   the terms of the GNU Library General Public License version 2, with a    *)
(*   special exception on linking, as described in the file LICENSE.          *)
(*                                                                            *)
(******************************************************************************)

type position = Lexing.position
open EngineTypes

(* The LR parsing engine. *)

(* This module is used:

   - at compile time, if so requested by the user, via the --interpret options;
   - at run time, in the table-based back-end. *)

module Make (T : TABLE) = struct

  (* This propagates type and exception definitions. The functions [number],
     [production_index], [find_production], too, are defined by this [include]
     declaration. *)

  include T

  type 'a env =
      (state, semantic_value, token) EngineTypes.env

  (* ------------------------------------------------------------------------ *)

  (* The type [checkpoint] represents an intermediate or final result of the
     parser. See [EngineTypes]. *)

  (* The type [checkpoint] is presented to the user as a private type (see
     [IncrementalEngine]). This prevents the user from manufacturing
     checkpoints (i.e., continuations) that do not make sense. (Such
     continuations could potentially violate the LR invariant and lead to
     crashes.) *)

  (* 2017/03/29 Although [checkpoint] is a private type, we now expose a
     constructor function, [input_needed]. This function allows manufacturing
     a checkpoint out of an environment. For this reason, the type [env] must
     also be parameterized with ['a]. *)

  type 'a checkpoint =
    | InputNeeded of 'a env
    | Shifting of 'a env * 'a env * bool
    | AboutToReduce of 'a env * production
    | HandlingError of 'a env
    | Accepted of 'a
    | Rejected

  (* ------------------------------------------------------------------------ *)

  (* As of 2020/12/16, we introduce a choice between multiple error handling
     strategies. *)

  (* Regardless of the strategy, when a syntax error is encountered, the
     function [initiate] is called, a [HandlingError] checkpoint is produced,
     and (after resuming) the function [error] is called. This function checks
     whether the current state allows shifting, reducing, or neither, when the
     lookahead token is [error]. Its behavior, then, depends on the strategy,
     as follows. *)

  (* In the legacy strategy, which until now was the only strategy,

     - If shifting is possible, then a [Shifting] checkpoint is produced,
       whose field [please_discard] is [true], so (after resuming) an
       [InputNeeded] checkpoint is produced, and (after a new token
       has been provided) the parser leaves error-handling mode and
       returns to normal mode.

     - If reducing is possible, then one or more reductions are performed.
       Default reductions are announced via [AboutToReduce] checkpoints,
       whereas ordinary reductions are performed silently. (It is unclear
       why this is so.) The parser remains in error-handling mode, so
       another [HandlingError] checkpoint is produced, and the function
       [error] is called again.

     - If neither action is possible and if the stack is nonempty, then a
       cell is popped off the stack, then a [HandlingError] checkpoint is
       produced, and the function [error] is called again.

     - If neither action is possible and if the stack is empty, then the
       parse dies with a [Reject] checkpoint. *)

  (* The simplified strategy differs from the legacy strategy as follows:

     - When shifting, a [Shifting] checkpoint is produced, whose field
       [please_discard] is [false], so the parser does not request another
       token, and the parser remains in error-handling mode. (If the
       destination state of this shift transition has a default reduction,
       then the parser will perform this reduction as its next step.)

     - When reducing, all reductions are announced by [AboutToReduce]
       checkpoints.

     - If neither shifting [error] nor reducing on [error] is possible,
       then the parser dies with a [Reject] checkpoint. (The parser does
       not attempt to pop cells off the stack one by one.)

     This simplified strategy is appropriate when the grammar uses the [error]
     token in a limited way, where the [error] token always appears at the end
     of a production whose semantic action raises an exception (whose purpose
     is to signal a syntax error and perhaps produce a custom message). Then,
     the parser must not request one token past the syntax error. (In a REPL,
     that would be undesirable.) It must perform as many reductions on [error]
     as possible, then (if possible) shift the [error] token and move to a new
     state where a default reduction will be possible. (Because the [error]
     token always appears at the end of a production, no other action can
     exist in that state, so a default reduction must exist.) The semantic
     action raises an exception, and that is it. *)

  (* Let us note that it is also possible to perform no error handling at
     all, or to perform customized error handling, by stopping as soon as
     the first [ErrorHandling] checkpoint appears. *)

  type strategy =
    [ `Legacy | `Simplified ]

  (* ------------------------------------------------------------------------ *)

  (* In the code-based back-end, the [run] function is sometimes responsible
     for pushing a new cell on the stack. This is motivated by code sharing
     concerns. In this interpreter, there is no such concern; [run]'s caller
     is always responsible for updating the stack. *)

  (* In the code-based back-end, there is a [run] function for each state
     [s]. This function can behave in two slightly different ways, depending
     on when it is invoked, or (equivalently) depending on [s].

     If [run] is invoked after shifting a terminal symbol (or, equivalently,
     if [s] has a terminal incoming symbol), then [run] discards a token,
     unless [s] has a default reduction on [#]. (Indeed, in that case,
     requesting the next token might drive the lexer off the end of the input
     stream.)

     If, on the other hand, [run] is invoked after performing a goto
     transition, or invoked directly by an entry point, then there is nothing
     to discard.

     These two cases are reflected in [CodeBackend.gettoken].

     Here, the code is structured in a slightly different way. It is up to the
     caller of [run] to indicate whether to discard a token, via the parameter
     [please_discard]. This flag is set when [s] is being entered by shifting
     a terminal symbol and [s] does not have a default reduction on [#]. *)

  (* The following recursive group of functions are tail recursive, produce a
     checkpoint of type [semantic_value checkpoint], and cannot raise an
     exception. *)

  let rec run env please_discard : semantic_value checkpoint =

    (* Log the fact that we just entered this state. *)

    if log then
      Log.state env.current;

    (* If [please_discard] is set, we discard the current lookahead token and
       fetch the next one. In order to request a token from the user, we
       return an [InputNeeded] continuation, which, when invoked by the user,
       will take us to [discard]. If [please_discard] is not set, we skip this
       step and jump directly to [check_for_default_reduction]. *)

    if please_discard then
      InputNeeded env
    else
      check_for_default_reduction env

  (* [discard env triple] stores [triple] into [env], overwriting the previous
     token. It is invoked by [offer], which itself is invoked by the user in
     response to an [InputNeeded] checkpoint. *)

  and discard env triple =
    if log then begin
      let (token, startp, endp) = triple in
      Log.lookahead_token (T.token2terminal token) startp endp
    end;
    let env = { env with error = false; triple } in
    check_for_default_reduction env

  and check_for_default_reduction env =

    (* Examine what situation we are in. This case analysis is analogous to
       that performed in [CodeBackend.gettoken], in the sub-case where we do
       not have a terminal incoming symbol. *)

    T.default_reduction
      env.current
      announce_reduce       (* there is a default reduction; perform it *)
      check_for_error_token (* there is none; continue below *)
      env

  and check_for_error_token env =

    (* There is no default reduction. Consult the current lookahead token
       so as to determine which action should be taken. *)

    (* Peeking at the first input token, without taking it off the input
       stream, is done by reading [env.triple]. We are careful to first
       check [env.error]. *)

    (* Note that, if [please_discard] was true, then we have just called
       [discard], so the lookahead token cannot be [error]. *)

    (* Returning [HandlingError env] is like calling [error ~strategy env]
       directly, except it allows the user to regain control and choose an
       error-handling strategy. *)

    if env.error then begin
      if log then
        Log.resuming_error_handling();
      HandlingError env
    end
    else
      let (token, _, _) = env.triple in

      (* We consult the two-dimensional action table, indexed by the
         current state and the current lookahead token, in order to
         determine which action should be taken. *)

      T.action
        env.current                    (* determines a row *)
        (T.token2terminal token)       (* determines a column *)
        (T.token2value token)
        shift                          (* shift continuation *)
        announce_reduce                (* reduce continuation *)
        initiate                       (* failure continuation *)
        env

  (* ------------------------------------------------------------------------ *)

  (* This function takes care of shift transitions along a terminal symbol.
     (Goto transitions are taken care of within [reduce] below.) The symbol
     can be either an actual token or the [error] pseudo-token. *)

  (* Here, the lookahead token CAN be [error]. *)

  and shift env
      (please_discard : bool)
      (terminal : terminal)
      (value : semantic_value)
      (s' : state) =

    (* Log the transition. *)

    if log then
      Log.shift terminal s';

    (* Push a new cell onto the stack, containing the identity of the
       state that we are leaving. *)

    let (_, startp, endp) = env.triple in
    let stack = {
      state = env.current;
      semv = value;
      startp;
      endp;
      next = env.stack;
    } in

    (* Switch to state [s']. *)

    let new_env = { env with stack; current = s' } in

    (* Expose the transition to the user. (In principle, we have a choice
       between exposing the transition before we take it, after we take
       it, or at some point in between. This affects the number and type
       of the parameters carried by [Shifting]. Here, we choose to expose
       the transition after we take it; this allows [Shifting] to carry
       only three parameters, whose meaning is simple.) *)

    Shifting (env, new_env, please_discard)

  (* ------------------------------------------------------------------------ *)

  (* The function [announce_reduce] stops the parser and returns a checkpoint
     which allows the parser to be resumed by calling [reduce]. *)

  (* Only ordinary productions are exposed to the user. Start productions
     are not exposed to the user. Reducing a start production simply leads
     to the successful termination of the parser. *)

  and announce_reduce env (prod : production) =
    if T.is_start prod then
      accept env prod
    else
      AboutToReduce (env, prod)

  (* The function [reduce] takes care of reductions. It is invoked by
     [resume] after an [AboutToReduce] event has been produced. *)

  (* Here, the lookahead token CAN be [error]. *)

  (* The production [prod] CANNOT be a start production. *)

  and reduce env (prod : production) =

    (* Log a reduction event. *)

    if log then
      Log.reduce_or_accept prod;

    (* Invoke the semantic action. The semantic action is responsible for
       truncating the stack and pushing a new cell onto the stack, which
       contains a new semantic value. The semantic action returns a new stack,
       which becomes the current stack. *)

    let stack = T.semantic_action prod env in

    (* By our convention, the semantic action has produced an updated
       stack. The state now found in the top stack cell is the return
       state. *)

    (* Perform a goto transition. The target state is determined
       by consulting the goto table at the return state and at
       production [prod]. *)

    let current = T.goto_prod stack.state prod in
    let env = { env with stack; current } in
    run env false

  and accept env prod =
    (* Log an accept event. *)
    if log then
      Log.reduce_or_accept prod;
    (* Extract the semantic value out of the stack. *)
    let v = env.stack.semv in
    (* Finish. *)
    Accepted v

  (* ------------------------------------------------------------------------ *)

  (* The following functions deal with errors. *)

  (* [initiate] initiates or resumes error handling. *)

  (* Here, the lookahead token CAN be [error]. *)

  and initiate env =
    if log then
      Log.initiating_error_handling();
    let env = { env with error = true } in
    HandlingError env

  (* [error] handles errors. *)

  and error ~strategy env =
    assert env.error;

    (* Consult the column associated with the [error] pseudo-token in the
       action table. *)

    T.action
      env.current                    (* determines a row *)
      T.error_terminal               (* determines a column *)
      T.error_value
      (error_shift ~strategy)        (* shift continuation *)
      (error_reduce ~strategy)       (* reduce continuation *)
      (error_fail ~strategy)         (* failure continuation *)
      env

  and error_shift ~strategy env please_discard terminal value s' =
    assert (terminal = T.error_terminal && value = T.error_value);

    (* This state is capable of shifting the [error] token. *)

    if log then
      Log.handling_error env.current;

    (* In the simplified strategy, we change [please_discard] to [false],
       which means that we won't request the next token and (therefore)
       we will remain in error-handling mode after shifting the [error]
       token. *)

    let please_discard =
      match strategy with `Legacy -> please_discard | `Simplified -> false
    in

    shift env please_discard terminal value s'

  and error_reduce ~strategy env prod =

    (* This state is capable of performing a reduction on [error]. *)

    if log then
      Log.handling_error env.current;

    (* In the legacy strategy, we call [reduce] instead of [announce_reduce],
       apparently in an attempt to hide the reduction steps performed during
       error handling. In the simplified strategy, all reductions steps are
       announced. *)

    match strategy with
    | `Legacy ->
        reduce env prod
    | `Simplified ->
        announce_reduce env prod

  and error_fail ~strategy env =

    (* This state is unable to handle errors. In the simplified strategy, we
       die immediately. In the legacy strategy, we attempt to pop a stack
       cell. (This amounts to forgetting part of what we have just read, in
       the hope of reaching a state where we can shift the [error] token and
       resume parsing in normal mode. Forgetting past input is not appropriate
       when the goal is merely to produce a good syntax error message.) *)

    match strategy with
    | `Simplified ->
        Rejected
    | `Legacy ->

    (* Attempt to pop a stack cell. *)

    let cell = env.stack in
    let next = cell.next in
    if next == cell then

      (* The stack is empty. Die. *)

      Rejected

    else begin

      (* The stack is nonempty. Pop a cell, updating the current state
         to the state [cell.state] found in the popped cell, and continue
         error handling there. *)

      (* I note that if the new state [cell.state] has a default reduction,
         then it is ignored. It is unclear whether this is intentional. It
         could be a good thing, as it avoids a scenario where the parser
         diverges by repeatedly popping, performing a default reduction of
         an epsilon production, popping, etc. Still, the question of whether
         to obey default reductions while error handling seems obscure. *)

      let env = { env with
        stack = next;
        current = cell.state
      } in
      HandlingError env

    end

  (* End of the nest of tail recursive functions. *)

  (* ------------------------------------------------------------------------ *)
  (* ------------------------------------------------------------------------ *)

  (* The incremental interface. See [EngineTypes]. *)

  (* [start s] begins the parsing process. *)

  let start (s : state) (initial : position) : semantic_value checkpoint =

    (* Build an empty stack. This is a dummy cell, which is its own successor.
       Its [next] field WILL be accessed by [error_fail] if an error occurs and
       is propagated all the way until the stack is empty. Its [endp] field WILL
       be accessed (by a semantic action) if an epsilon production is reduced
       when the stack is empty. *)

    let rec empty = {
      state = s;                          (* dummy *)
      semv = T.error_value;               (* dummy *)
      startp = initial;                   (* dummy *)
      endp = initial;
      next = empty;
    } in

    (* Build an initial environment. *)

    (* Unfortunately, there is no type-safe way of constructing a
       dummy token. Tokens carry semantic values, which in general
       we cannot manufacture. This instance of [Obj.magic] could
       be avoided by adopting a different representation (e.g., no
       [env.error] field, and an option in the first component of
       [env.triple]), but I like this representation better. *)

    let dummy_token = Obj.magic () in
    let env = {
      error = false;
      triple = (dummy_token, initial, initial); (* dummy *)
      stack = empty;
      current = s;
    } in

    (* Begin parsing. *)

    (* The parameter [please_discard] here is [true], which means we know
       that we must read at least one token. This claim relies on the fact
       that we have ruled out the two special cases where a start symbol
       recognizes the empty language or the singleton language {epsilon}. *)

    run env true

  (* [offer checkpoint triple] is invoked by the user in response to a
     checkpoint of the form [InputNeeded env]. It checks that [checkpoint] is
     indeed of this form, and invokes [discard]. *)

  (* [resume checkpoint] is invoked by the user in response to a checkpoint of
     the form [AboutToReduce (env, prod)] or [HandlingError env]. It checks
     that [checkpoint] is indeed of this form, and invokes [reduce] or
     [error], as appropriate. *)

  (* In reality, [offer] and [resume] accept an argument of type
     [semantic_value checkpoint] and produce a checkpoint of the same type.
     The choice of [semantic_value] is forced by the fact that this is the
     parameter of the checkpoint [Accepted]. *)

  (* We change this as follows. *)

  (* We change the argument and result type of [offer] and [resume] from
     [semantic_value checkpoint] to ['a checkpoint]. This is safe, in this
     case, because we give the user access to values of type [t checkpoint]
     only if [t] is indeed the type of the eventual semantic value for this
     run. (More precisely, by examining the signatures [INCREMENTAL_ENGINE]
     and [INCREMENTAL_ENGINE_START], one finds that the user can build a value
     of type ['a checkpoint] only if ['a] is [semantic_value]. The table
     back-end goes further than this and produces versions of [start] composed
     with a suitable cast, which give the user access to a value of type
     [t checkpoint] where [t] is the type of the start symbol.) *)

  let offer : 'a . 'a checkpoint ->
                   token * position * position ->
                   'a checkpoint
  = function
    | InputNeeded env ->
        Obj.magic discard env
    | _ ->
        invalid_arg "offer expects InputNeeded"

  let resume : 'a . ?strategy:strategy -> 'a checkpoint -> 'a checkpoint =
  fun ?(strategy=`Legacy) checkpoint ->
    match checkpoint with
    | HandlingError env ->
        Obj.magic error ~strategy env
    | Shifting (_, env, please_discard) ->
        Obj.magic run env please_discard
    | AboutToReduce (env, prod) ->
        Obj.magic reduce env prod
    | _ ->
        invalid_arg "resume expects HandlingError | Shifting | AboutToReduce"

  (* ------------------------------------------------------------------------ *)
  (* ------------------------------------------------------------------------ *)

  (* The traditional interface. See [EngineTypes]. *)

  (* ------------------------------------------------------------------------ *)

  (* Wrapping a lexer and lexbuf as a token supplier. *)

  type supplier =
    unit -> token * position * position

  let lexer_lexbuf_to_supplier
      (lexer : Lexing.lexbuf -> token)
      (lexbuf : Lexing.lexbuf)
  : supplier =
    fun () ->
      let token = lexer lexbuf in
      let startp = lexbuf.Lexing.lex_start_p
      and endp = lexbuf.Lexing.lex_curr_p in
      token, startp, endp

  (* ------------------------------------------------------------------------ *)

  (* The main loop repeatedly handles intermediate checkpoints, until a final
     checkpoint is obtained. This allows implementing the monolithic interface
     ([entry]) in terms of the incremental interface ([start], [offer],
     [handle], [reduce]). *)

  (* By convention, acceptance is reported by returning a semantic value,
     whereas rejection is reported by raising [Error]. *)

  (* [loop] is polymorphic in ['a]. No cheating is involved in achieving this.
     All of the cheating resides in the types assigned to [offer] and [handle]
     above. *)

  let rec loop : 'a . ?strategy:strategy -> supplier -> 'a checkpoint -> 'a =
    fun ?(strategy=`Legacy) read checkpoint ->
    match checkpoint with
    | InputNeeded _ ->
        (* The parser needs a token. Request one from the lexer,
           and offer it to the parser, which will produce a new
           checkpoint. Then, repeat. *)
        let triple = read() in
        let checkpoint = offer checkpoint triple in
        loop ~strategy read checkpoint
    | Shifting _
    | AboutToReduce _
    | HandlingError _ ->
        (* The parser has suspended itself, but does not need
           new input. Just resume the parser. Then, repeat. *)
        let checkpoint = resume ~strategy checkpoint in
        loop ~strategy read checkpoint
    | Accepted v ->
        (* The parser has succeeded and produced a semantic value.
           Return this semantic value to the user. *)
        v
    | Rejected ->
        (* The parser rejects this input. Raise an exception. *)
        raise Error

  let entry strategy (s : state) lexer lexbuf : semantic_value =
    let initial = lexbuf.Lexing.lex_curr_p in
    loop ~strategy (lexer_lexbuf_to_supplier lexer lexbuf) (start s initial)

  (* ------------------------------------------------------------------------ *)

  (* [loop_handle] stops if it encounters an error, and at this point, invokes
     its failure continuation, without letting Menhir do its own traditional
     error-handling (which involves popping the stack, etc.). *)

  let rec loop_handle succeed fail read checkpoint =
    match checkpoint with
    | InputNeeded _ ->
        let triple = read() in
        let checkpoint = offer checkpoint triple in
        loop_handle succeed fail read checkpoint
    | Shifting _
    | AboutToReduce _ ->
        (* Which strategy is passed to [resume] here is irrelevant,
           since this checkpoint is not [HandlingError _]. *)
        let checkpoint = resume checkpoint in
        loop_handle succeed fail read checkpoint
    | HandlingError _
    | Rejected ->
        (* The parser has detected an error. Invoke the failure continuation. *)
        fail checkpoint
    | Accepted v ->
        (* The parser has succeeded and produced a semantic value. Invoke the
           success continuation. *)
        succeed v

  (* ------------------------------------------------------------------------ *)

  (* [loop_handle_undo] is analogous to [loop_handle], except it passes a pair
     of checkpoints to the failure continuation.

     The first (and oldest) checkpoint is the last [InputNeeded] checkpoint
     that was encountered before the error was detected. The second (and
     newest) checkpoint is where the error was detected, as in [loop_handle].
     Going back to the first checkpoint can be thought of as undoing any
     reductions that were performed after seeing the problematic token. (These
     reductions must be default reductions or spurious reductions.) *)

  let rec loop_handle_undo succeed fail read (inputneeded, checkpoint) =
    match checkpoint with
    | InputNeeded _ ->
        (* Update the last recorded [InputNeeded] checkpoint. *)
        let inputneeded = checkpoint in
        let triple = read() in
        let checkpoint = offer checkpoint triple in
        loop_handle_undo succeed fail read (inputneeded, checkpoint)
    | Shifting _
    | AboutToReduce _ ->
        (* Which strategy is passed to [resume] here is irrelevant,
           since this checkpoint is not [HandlingError _]. *)
        let checkpoint = resume checkpoint in
        loop_handle_undo succeed fail read (inputneeded, checkpoint)
    | HandlingError _
    | Rejected ->
        fail inputneeded checkpoint
    | Accepted v ->
        succeed v

  (* For simplicity, we publish a version of [loop_handle_undo] that takes a
     single checkpoint as an argument, instead of a pair of checkpoints. We
     check that the argument is [InputNeeded _], and duplicate it. *)

  (* The parser cannot accept or reject before it asks for the very first
     character of input. (Indeed, we statically reject a symbol that
     generates the empty language or the singleton language {epsilon}.)
     So, the [start] checkpoint must match [InputNeeded _]. Hence, it is
     permitted to call [loop_handle_undo] with a [start] checkpoint. *)

  let loop_handle_undo succeed fail read checkpoint =
    assert (match checkpoint with InputNeeded _ -> true | _ -> false);
    loop_handle_undo succeed fail read (checkpoint, checkpoint)

  (* ------------------------------------------------------------------------ *)

  let rec shifts checkpoint =
    match checkpoint with
    | Shifting (env, _, _) ->
        (* The parser is about to shift, which means it is willing to
           consume the terminal symbol that we have fed it. Return the
           state just before this transition. *)
        Some env
    | AboutToReduce _ ->
        (* The parser wishes to reduce. Just follow. *)
        (* Which strategy is passed to [resume] here is irrelevant,
           since this checkpoint is not [HandlingError _]. *)
        shifts (resume checkpoint)
    | HandlingError _ ->
        (* The parser fails, which means it rejects the terminal symbol
           that we have fed it. *)
        None
    | InputNeeded _
    | Accepted _
    | Rejected ->
        (* None of these cases can arise. Indeed, after a token is submitted
           to it, the parser must shift, reduce, or signal an error, before
           it can request another token or terminate. *)
        assert false

  let acceptable checkpoint token pos =
    let triple = (token, pos, pos) in
    let checkpoint = offer checkpoint triple in
    match shifts checkpoint with
    | None      -> false
    | Some _env -> true

  (* ------------------------------------------------------------------------ *)

  (* The type ['a lr1state] describes the (non-initial) states of the LR(1)
     automaton. The index ['a] represents the type of the semantic value
     associated with the state's incoming symbol. *)

  (* The type ['a lr1state] is defined as an alias for [state], which itself
     is usually defined as [int] (see [TableInterpreter]). So, ['a lr1state]
     is technically a phantom type, but should really be thought of as a GADT
     whose data constructors happen to be represented as integers. It is
     presented to the user as an abstract type (see [IncrementalEngine]). *)

  type 'a lr1state =
      state

  (* ------------------------------------------------------------------------ *)

  (* Stack inspection. *)

  (* We offer a read-only view of the parser's state as a stream of elements.
     Each element contains a pair of a (non-initial) state and a semantic
     value associated with (the incoming symbol of) this state. Note that the
     type [element] is an existential type. *)

  (* As of 2017/03/31, the type [stack] and the function [stack] are DEPRECATED.
     If desired, they could now be implemented outside Menhir, by relying on
     the functions [top] and [pop]. *)

  type element =
    | Element: 'a lr1state * 'a * position * position -> element

  open General

  type stack =
    element stream

  (* If [current] is the current state and [cell] is the top stack cell,
     then [stack cell current] is a view of the parser's state as a stream
     of elements. *)

  let rec stack cell current : element stream =
    lazy (
      (* The stack is empty iff the top stack cell is its own successor. In
         that case, the current state [current] should be an initial state
         (which has no incoming symbol).
         We do not allow the user to inspect this state. *)
      let next = cell.next in
      if next == cell then
        Nil
      else
        (* Construct an element containing the current state [current] as well
           as the semantic value contained in the top stack cell. This semantic
           value is associated with the incoming symbol of this state, so it
           makes sense to pair them together. The state has type ['a state] and
           the semantic value has type ['a], for some type ['a]. Here, the OCaml
           type-checker thinks ['a] is [semantic_value] and considers this code
           well-typed. Outside, we will use magic to provide the user with a way
           of inspecting states and recovering the value of ['a]. *)
        let element = Element (
          current,
          cell.semv,
          cell.startp,
          cell.endp
        ) in
        Cons (element, stack next cell.state)
    )

  let stack env : element stream =
    stack env.stack env.current

  (* As explained above, the function [top] allows access to the top stack
     element only if the stack is nonempty, i.e., only if the current state
     is not an initial state. *)

  let top env : element option =
    let cell = env.stack in
    let next = cell.next in
    if next == cell then
      None
    else
      Some (Element (env.current, cell.semv, cell.startp, cell.endp))

  (* [equal] compares the stacks for physical equality, and compares the
     current states via their numbers (this seems cleaner than using OCaml's
     polymorphic equality). *)

  (* The two fields that are not compared by [equal], namely [error] and
     [triple], are overwritten by the function [discard], which handles
     [InputNeeded] checkpoints. Thus, if [equal env1 env2] holds, then the
     checkpoints [input_needed env1] and [input_needed env2] are
     equivalent: they lead the parser to behave in the same way. *)

  let equal env1 env2 =
    env1.stack == env2.stack &&
    number env1.current = number env2.current

  let current_state_number env =
    number env.current

  (* ------------------------------------------------------------------------ *)

  (* Access to the position of the lookahead token. *)

  let positions { triple = (_, startp, endp); _ } =
    startp, endp

  (* ------------------------------------------------------------------------ *)

  (* Access to information about default reductions. *)

  (* This can be a function of states, or a function of environments.
     We offer both. *)

  (* Instead of a Boolean result, we could return a [production option].
     However, we would have to explicitly test whether [prod] is a start
     production, and in that case, return [None], I suppose. Indeed, we
     have decided not to expose the start productions. *)

  let state_has_default_reduction (state : _ lr1state) : bool =
    T.default_reduction state
      (fun _env _prod -> true)
      (fun _env -> false)
      ()

  let env_has_default_reduction env =
    state_has_default_reduction env.current

  (* ------------------------------------------------------------------------ *)

  (* The following functions work at the level of environments (as opposed to
     checkpoints). The function [pop] causes the automaton to go back into the
     past, pretending that the last input symbol has never been read. The
     function [force_reduction] causes the automaton to re-interpret the past,
     by recognizing the right-hand side of a production and reducing this
     production. The function [feed] causes the automaton to progress into the
     future by pretending that a (terminal or nonterminal) symbol has been
     read. *)

  (* The function [feed] would ideally be defined here. However, for this
     function to be type-safe, the GADT ['a symbol] is needed. For this
     reason, we move its definition to [InspectionTableInterpreter], where
     the inspection API is available. *)

  (* [pop] pops one stack cell. It cannot go wrong. *)

  let pop (env : 'a env) : 'a env option =
    let cell = env.stack in
    let next = cell.next in
    if next == cell then
      (* The stack is empty. *)
      None
    else
      (* The stack is nonempty. Pop off one cell. *)
      Some { env with stack = next; current = cell.state }

  (* [force_reduction] is analogous to [reduce], except that it does not
     continue by calling [run env] or [initiate env]. Instead, it returns
     [env] to the user. *)

  (* [force_reduction] is dangerous insofar as it executes a semantic action.
     This semantic action could have side effects: nontermination, state,
     exceptions, input/output, etc. *)

  let force_reduction prod (env : 'a env) : 'a env =
    (* Check if this reduction is permitted. This check is REALLY important.
       The stack must have the correct shape: that is, it must be sufficiently
       high, and must contain semantic values of appropriate types, otherwise
       the semantic action will crash and burn. *)
    (* We currently check whether the current state is WILLING to reduce this
       production (i.e., there is a reduction action in the action table row
       associated with this state), whereas it would be more liberal to check
       whether this state is CAPABLE of reducing this production (i.e., the
       stack has an appropriate shape). We currently have no means of
       performing such a check. *)
    if not (T.may_reduce env.current prod) then
      invalid_arg "force_reduction: this reduction is not permitted in this state"
    else begin
      (* We do not expose the start productions to the user, so this cannot be
         a start production. Hence, it has a semantic action. *)
      assert (not (T.is_start prod));
      (* Invoke the semantic action. *)
      let stack = T.semantic_action prod env in
      (* Perform a goto transition. *)
      let current = T.goto_prod stack.state prod in
      { env with stack; current }
    end

  (* The environment manipulation functions -- [pop] and [force_reduction]
     above, plus [feed] -- manipulate the automaton's stack and current state,
     but do not affect the automaton's lookahead symbol. When the function
     [input_needed] is used to go back from an environment to a checkpoint
     (and therefore, resume normal parsing), the lookahead symbol is clobbered
     anyway, since the only action that the user can take is to call [offer].
     So far, so good. One problem, though, is that this call to [offer] may
     well place the automaton in a configuration of a state [s] and a
     lookahead symbol [t] that is normally unreachable. Also, perhaps the
     state [s] is a state where an input symbol normally is never demanded, so
     this [InputNeeded] checkpoint is fishy. There does not seem to be a deep
     problem here, but, when programming an error recovery strategy, one
     should pay some attention to this issue. Ideally, perhaps, one should use
     [input_needed] only in a state [s] where an input symbol is normally
     demanded, that is, a state [s] whose incoming symbol is a terminal symbol
     and which does not have a default reduction on [#]. *)

  let input_needed (env : 'a env) : 'a checkpoint =
    InputNeeded env

  (* The following functions are compositions of [top] and [pop]. *)

  let rec pop_many i env =
    if i = 0 then
      Some env
    else match pop env with
    | None ->
        None
    | Some env ->
        pop_many (i - 1) env

  let get i env =
    match pop_many i env with
    | None ->
        None
    | Some env ->
        top env

end