File: cf_parser.mli

package info (click to toggle)
pagodacf 0.10-1
  • links: PTS, VCS
  • area: main
  • in suites: lenny
  • size: 1,204 kB
  • ctags: 2,320
  • sloc: ml: 8,458; ansic: 3,338; makefile: 171; sh: 27
file content (268 lines) | stat: -rw-r--r-- 11,592 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
(*---------------------------------------------------------------------------*
  INTERFACE  cf_parser.mli

  Copyright (c) 2002-2006, James H. Woodyatt
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions
  are met:

    Redistributions of source code must retain the above copyright
    notice, this list of conditions and the following disclaimer.

    Redistributions in binary form must reproduce the above copyright
    notice, this list of conditions and the following disclaimer in
    the documentation and/or other materials provided with the
    distribution

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  OF THE POSSIBILITY OF SUCH DAMAGE. 
 *---------------------------------------------------------------------------*)

(** Functional LL(x) parsing with monadic combinators. *)

(** This module implements function left-shift/left-reduce parser combinators
    using a state-exception monad over the input stream.  To evaluate a parser
    monad is to parse an input stream.  The state monad is lifted into the
    exception monad to facilitate backtracking.  Parsers should signal errors
    in the input stream with ordinary Objective Caml exceptions.
*)

(** The parser monad.  A function that parses a sequence of input tokens.
    Returns [None] if the parser does not recognize any symbols.  Otherwise
    returns the reduced output and the remainder of the input tokens.
*)
type ('i, 'o) t = 'i Cf_seq.t -> ('o * 'i Cf_seq.t) option

(** Generic parser error with no parameters. *)
exception Error

(** A parser that never recognizes any input, i.e. it always returns [None]. *)
val nil: ('i, 'o) t

(** Use [err ?f ()] to compose parser that applies the input token stream to
    the optional function [f] to obtain an Objective Caml exception, then
    raises the exception.  The default function simply raises [Error].
*)
val err: ?f:('i Cf_seq.t -> exn) -> unit -> ('i, 'x) t

(** Use [req f p] to create a parser that requires the input stream to match
    the parser [p] or it will be passed to the parser [err f] instead.
*)
val req: ?f:('i Cf_seq.t -> exn) -> ('i, 'o) t -> ('i, 'o) t

(** A parser that produces the unit value when it recognizes the end of the
    input token sequence.
*)
val fin: ('i, unit) t

(** Use [alt plist] to create a parser that produces the output from the first
    parser in the list [plist] that recognizes a pattern in the input.  If no
    parser in the list recognizes a pattern, then the parser constructed by
    this function returns [None].
*)
val alt: ('i, 'o) t list -> ('i, 'o) t

(** Use [altz pseq] to create a parser that produces the output from the first
    parser in the lazy sequence [pseq] that recognizes a pattern in the input.
    If no parser in the sequence recognizes a pattern, then the parser
    constructed by this function returns [None].
*)
val altz: ('i, 'o) t Cf_seq.t -> ('i, 'o) t

(** Use [sat f] to create a parser that recognizes, shifts and reduces input
    tokens for which the satisfier function [f] returns [true].
*)
val sat: ('i -> bool) -> ('i, 'i) t

(** Use [tok f] to recognize and shift input tokens for which the tokenizer
    function [f] reduces an output value.
*)
val tok: ('i -> 'o option) -> ('i, 'o) t

(** Use [lit s obj] to obtain a parser on character input sequences that
    produces the output [obj] when it recognizes the literal [s] in the input.
*)
val lit: string -> 'o -> (char, 'o) t

(** Use [unfold p i] to create a sequence of output values recognized by
    applying the input token sequence [i] to the parser [p] until no more
    input is recognized.
*)
val unfold: ('i, 'o) t -> 'i Cf_seq.t -> 'o Cf_seq.t

(** A class useful for tracking the position in the input token stream that
    corresponds to the head of the sequence passed to a parser.  The [#cursor]
    class type is used in the [X] module defined below.
*)
class ['i] cursor:
    int ->  (** The initial position, i.e. usually zero *)
    object('self)
        val position_: int      (** The current position *)
        
        (** Use [c#advance i] to construct a new object corresponding to the
            new input position after reading an input symbol [i].
        *)
        method advance: 'i -> 'self
        
        (** Returns the current position. *)
        method position: int
    end

(** A module of parser extensions for working with input sequences that require
    position information in the parse function.
*)
module X: sig
    (** A parser where every token in the input sequence is accompanied by a
        {!Cf_parser.cursor} class object.
    *)
    type ('c, 'i, 'o) t = 'z Cf_seq.t -> ('o * 'z Cf_seq.t) option
        constraint 'z = 'i * 'c
        constraint 'c = 'x #cursor

    (** Generic parser error with one positional parameter. *)
    exception Error of int

    (** Use [err ?f ()] to compose parser that applies the input token stream
        to the optional function [f] to obtain an Objective Caml exception,
        then raises the exception.  The default function simply raises [Error].
    *)
    val err: ?f:(('i * 'c) Cf_seq.t -> exn) -> unit -> ('c, 'i, 'o) t

    (** Use [req ?f p] to create a parser that requires the input stream to
        match the parser [p] or it will be passed to the parser [err ?f ()]
        instead.
    *)
    val req: ?f:(('i * 'c) Cf_seq.t -> exn) -> ('c, 'i, 'o) t -> ('c, 'i, 'o) t

    (** Use [sat f] to create a parser that recognizes, shifts and reduces
        input tokens for which the satisfier function [f] returns [true].
    *)
    val sat: ('i -> bool) -> ('c, 'i, 'i) t

    (** Use [tok f] to recognize and shift input tokens for which the tokenizer
        function [f] reduces an output value.
    *)
    val tok: ('i -> 'o option) -> ('c, 'i, 'o) t

    (** Use [lit s obj] to obtain a parser on character input sequences that
        produces the output [obj] when it recognizes the literal [s] in the
        input.
    *)
    val lit: string -> 'o -> ('c, char, 'o) t

    (** Use [weave ~c i] with an initial cursor [c] and an input sequence [i]
        to create an input sequence with accompanying cursor.
    *)
    val weave: c:('i #cursor as 'c) -> 'i Cf_seq.t -> ('i * 'c) Cf_seq.t

    (** Use [unfold p i] to create a sequence of output values recognized by
        applying the input token sequence [i] to the parser [p] until no more
        input is recognized.  The cursor objects in the output sequence
        elements correspond to the positions of the input sequence at the start
        of where the output was recognized.
    *)
    val unfold: ('c, 'i, 'o) t -> ('i * 'c) Cf_seq.t -> ('o * 'c) Cf_seq.t
end

(** Open this module to take the parser operators into the current scope. *)
module Op: sig

    (** The binding operator.  Use [p >>= f] to compose a parser that passes
        output of parser [p] to the bound function [f] which returns the parser
        for the next symbol in a parsing rule.
    *)
    val ( >>= ): ('i, 'a) t -> ('a -> ('i, 'b) t) -> ('i, 'b) t
    
    (** The return operator.  Use [~:obj] to create a parser that produces the
        value [obj] as its result without processing any more input.
    *)
    val ( ~: ): 'o -> ('i, 'o) t
    
    (** The unit operator.  Use [?.token] to create a parser that recognizes
        [token] at the head of the input stream and produces it as its output.
    *)
    val ( ?. ): 'i -> ('i, 'i) t
    
    (** The unit operator with a cursor.  Use [?:token] to create a parser that
        recognizes [token] at the head of a position attributed input stream
        and produces it as its output.
    *)
    val ( ?: ): 'i -> ('c, 'i, 'i) X.t
    
    (** The option operator.  Use [?/p] to create a parser that recognizes an
        optional symbol in the input stream with the parser [p].  If the symbol
        is recognized, its tokens are shifted and reduced as [Some obj],
        otherwise no tokens are shifted and the reduced value is [None].
        Parser functions created with this operator {i always} return [Some r],
        where [r] is the reduced value, i.e. either [Some obj] or [None].
    *)
    val ( ?/ ): ('i, 'o) t -> ('i, 'o option) t
    
    (** The zero-or-more operator.  Use [?*p] to create a parser that
        recognizes zero or more symbols in the input stream with the parser
        [p].  The tokens of all the symbols recognized are shifted and reduced
        as a list of objects in the order of their appearance in the input
        stream.  Parser functions created with this operator {i always} return
        [Some r], where [r] is the reduced list of symbols, which may be the
        empty list if there are no symbols recognized.
    *)
    val ( ?* ): ('i, 'o) t -> ('i, 'o list) t
    
    (** The one-or-more operator.  Use [?+p] to create a parser that recognizes
        one or more symbols in the input stream with the parser [p].  If the
        symbols are recognized in the input stream, then their tokens are
        shifted and reduced into a list of objects in the order of their
        appearance in the input stream.  Otherwise, no tokens are shifted and
        no output is reduced.
    *)
    val ( ?+ ): ('i, 'o) t -> ('i, 'o * 'o list) t
    
    (** The serial composition operator.  Use [p1 %= p2] to unfold the output
        token stream of parser [p1] and use it as the input token stream for
        parser [p2].  This is useful in the case that [p1] is a lexical
        analyzer created with the {!Cf_lex} module, and [p2] is a grammar that
        operates at the level of lexical tokens output by [p1].
    *)
    val ( %= ): ('c, 'i, 'x) X.t -> ('c, 'x, 'o) X.t -> ('c, 'i, 'o) X.t
end

(** Use [filter f p] to produce a parser that applies [f] to each output symbol
    of [p] and ignores all those for which the result is [false].
*)
val filter: ('o -> bool) -> ('i, 'o) t -> ('i, 'o) t

(** Use [map f p] to produce a parser that transforms each output symbol of [p]
    by applying [f] to its value.
*)
val map: ('x -> 'y) -> ('i, 'x) t -> ('i, 'y) t

(** Use [optmap f p] to produce a parser that transforms each output symbol of
    [p] by applying [f] to its value and ignoring all those for which the
    result is [None].
*)
val optmap: ('x -> 'y option) -> ('i, 'x) t -> ('i, 'y) t

(** Use [to_extended p] to convert the parser [p] into an extended parser that
    ignores the position information woven into the input stream.
*)
val to_extended: ('i, 'o) t -> ('c, 'i, 'o) X.t

(** Use [of_extended c p] to convert the parser [p] that requires position
    information in the input stream into a parser that assumes the input begins
    at the position of the cursor [c].
*)
val of_extended: ('i #cursor as 'c) -> ('c, 'i, 'o) X.t -> ('i, 'o) t

(*--- End of File [ cf_parser.mli ] ---*)