File: buf_read.mli

package info (click to toggle)
ocaml-eio 1.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,548 kB
  • sloc: ml: 14,608; ansic: 1,237; makefile: 25
file content (325 lines) | stat: -rw-r--r-- 12,228 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
(** This module provides fairly efficient non-backtracking parsers.
    It is modelled on Angstrom's API, and you should use that if
    backtracking is needed.

    Example:
    {[
      let r = Buf_read.of_flow flow ~max_size:1_000_000 in
      Buf_read.line r
    ]}
*)

open Std

type t
(** An input buffer. *)

exception Buffer_limit_exceeded
(** Raised if parsing an item would require enlarging the buffer beyond its configured limit. *)

type 'a parser = t -> 'a
(** An ['a parser] is a function that consumes and returns a value of type ['a].
    @raise Failure The flow can't be parsed as a value of type ['a].
    @raise End_of_file The flow ended without enough data to parse an ['a].
    @raise Buffer_limit_exceeded Parsing the value would exceed the configured size limit. *)

val parse : ?initial_size:int -> max_size:int -> 'a parser -> _ Flow.source -> ('a, [> `Msg of string]) result
(** [parse p flow ~max_size] uses [p] to parse everything in [flow].

    It is a convenience function that does
    {[
      let buf = of_flow flow ~max_size in
      format_errors (p <* end_of_input) buf
    ]}

    @param initial_size see {!of_flow}. *)

val parse_exn : ?initial_size:int -> max_size:int -> 'a parser -> _ Flow.source -> 'a
(** [parse_exn] wraps {!parse}, but raises [Failure msg] if that returns [Error (`Msg msg)].

    Catching exceptions with [parse] and then raising them might seem pointless,
    but this has the effect of turning e.g. an [End_of_file] exception into a [Failure]
    with a more user-friendly message. *)

val parse_string : 'a parser -> string -> ('a, [> `Msg of string]) result
(** [parse_string p s] uses [p] to parse everything in [s].
    It is defined as [format_errors (p <* end_of_input) (of_string s)] *)

val parse_string_exn : 'a parser -> string -> 'a
(** [parse_string_exn] is like {!parse_string}, but handles errors like {!parse_exn}. *)

val of_flow : ?initial_size:int -> max_size:int -> _ Flow.source -> t
(** [of_flow ~max_size flow] is a buffered reader backed by [flow].

    @param initial_size The initial amount of memory to allocate for the buffer.
    @param max_size The maximum size to which the buffer may grow.
                    This must be large enough to hold the largest single item
                    you want to parse (e.g. the longest line, if using
                    {!line}), plus any terminator needed to know the value is
                    complete (e.g. the newline character(s)). This is just to
                    prevent a run-away input from consuming all memory, and
                    you can usually just set it much larger than you expect
                    to need. *)

val of_buffer : Cstruct.buffer -> t
(** [of_buffer buf] is a reader that reads from [buf].
    [buf] is used directly, without being copied.
    [eof_seen (of_buffer buf) = true].
    This module will not modify [buf] itself, but it will expose it via {!peek}. *)

val of_string : string -> t
(** [of_string s] is a reader that reads from [s]. *)

val as_flow : t -> Flow.source_ty r
(** [as_flow t] is a buffered flow.

    Reading from it will return data from the buffer,
    only reading the underlying flow if the buffer is empty. *)

(** {2 Reading data} *)

val line : string parser
(** [line] parses one line.

    Lines can be terminated by either LF or CRLF.
    The returned string does not include the terminator.

    If [End_of_file] is reached after seeing some data but before seeing a line
    terminator, the data seen is returned as the last line. *)

val lines : string Seq.t parser
(** [lines] returns a sequence that lazily reads the next line until the end of the input is reached.

    [lines = seq line ~stop:at_end_of_input] *)

val char : char -> unit parser
(** [char c] checks that the next byte is [c] and consumes it.
    @raise Failure if the next byte is not [c] *)

val any_char : char parser
(** [any_char] parses one character. *)

val peek_char : char option parser
(** [peek_char] returns [Some c] where [c] is the next character, but does not consume it.

    Returns [None] at the end of the input stream rather than raising [End_of_file]. *)

val string : string -> unit parser
(** [string s] checks that [s] is the next string in the stream and consumes it.

    @raise Failure if [s] is not a prefix of the stream. *)

val uint8 : int parser
(** [uint8] parses the next byte as an unsigned 8-bit integer. *)

(** Big endian parsers *)
module BE : sig
  val uint16 : int parser
  (** [uint16] parses the next 2 bytes as the lower 16 bits of an [int] in big-endian byte order *)

  val uint32 : int32 parser
  (** [uint32] parses the next 4 bytes as an [int32] in big-endian byte order *)

  val uint48 : int64 parser
  (** [uint48] parses the next 6 bytes as a 48-bit unsigned big-endian integer *)

  val uint64 : int64 parser
  (** [uint64] parses the next 8 bytes as an [int64] in big-endian byte order *)

  val float : float parser
  (** [float] parses the next 4 bytes as a [float] in big-endian byte order *)

  val double : float parser
  (** [double] parses the next 8 bytes as a [float] in big-endian byte order *)
end

(** Little endian parsers *)
module LE : sig
  val uint16 : int parser
  (** [uint16] parses the next 2 bytes as the lower 16 bits of an [int] in little-endian byte order *)

  val uint32 : int32 parser
  (** [uint32] parses the next 4 bytes as an [int32] in little-endian byte order *)

  val uint48 : int64 parser
  (** [uint48] parses the next 6 bytes as a 48-bit unsigned big-endian integer *)

  val uint64 : int64 parser
  (** [uint64] parses the next 8 bytes as an [int64] in little-endian byte order *)

  val float : float parser
  (** [float] parses the next 4 bytes as a [float] in little-endian byte order *)

  val double : float parser
  (** [double] parses the next 8 bytes as a [float] in little-endian byte order *)
end

val take : int -> string parser
(** [take n] takes exactly [n] bytes from the input. *)

val take_all : string parser
(** [take_all] takes all remaining data until end-of-file.

    Returns [""] if already at end-of-file.

    @raise Buffer_limit_exceeded if the remaining data exceeds or equals the buffer limit
           (it needs one extra byte to confirm it has reached end-of-file). *)

val take_while : (char -> bool) -> string parser
(** [take_while p] finds the first byte for which [p] is false
    and consumes and returns all bytes before that.

    If [p] is true for all remaining bytes, it returns everything until end-of-file.

    It will return the empty string if there are no matching characters
    (and therefore never raises [End_of_file]). *)

val take_while1 : (char -> bool) -> string parser
(** [take_while1 p] is like [take_while]. However, the parser fails with "take_while1"
    if at least one character of input hasn't been consumed by the parser. *)

val skip_while : (char -> bool) -> unit parser
(** [skip_while p] skips zero or more bytes for which [p] is [true].

    [skip_while p t] does the same thing as [ignore (take_while p t)],
    except that it is not limited by the buffer size. *)

val skip_while1 : (char -> bool) -> unit parser
(** [skip_while1 p] is like [skip_while]. However, the parser fails with "skip_while1" if
    at least one character of input hasn't been skipped. *)

val skip : int -> unit parser
(** [skip n] discards the next [n] bytes.

    [skip n] = [map ignore (take n)],
    except that the number of skipped bytes may be larger than the buffer (it will not grow).

    Note: if [End_of_file] is raised, all bytes in the stream will have been consumed. *)

val at_end_of_input : bool parser
(** [at_end_of_input] returns [true] when at the end of the stream, or
    [false] if there is at least one more byte to be read. *)

val end_of_input : unit parser
(** [end_of_input] checks that there are no further bytes in the stream.
    @raise Failure if there are further bytes *)

(** {2 Combinators} *)

val seq : ?stop:bool parser -> 'a parser -> 'a Seq.t parser
(** [seq p] is a sequence that uses [p] to get the next item.

    A sequence node can only be used while the stream is at
    the expected position, and will raise [Invalid_argument]
    if any bytes have been consumed in the meantime. This
    also means that each node can only be used once; use
    {!Seq.memoize} to make the sequence persistent.

    It is not necessary to consume all the elements of the
    sequence.

    Example ([head 4] is a parser that takes 4 lines):

    {[
      let head n r =
        r |> Buf_read.(seq line) |> Seq.take n |> List.of_seq
    ]}

    @param stop This is used before parsing each item.
                The sequence ends if this returns [true].
                The default is {!at_end_of_input}. *)

val pair : 'a parser -> 'b parser -> ('a * 'b) parser
(** [pair a b] is a parser that first uses [a] to parse a value [x],
    then uses [b] to parse a value [y], then returns [(x, y)].

    Note that this module does not support backtracking, so if [b] fails
    then the bytes consumed by [a] are lost. *)

val return : 'a -> 'a parser
(** [return x] is a parser that consumes nothing and always returns [x].
    [return] is just [Fun.const]. *)

val map : ('a -> 'b) -> ('a parser -> 'b parser)
(** [map f a] is a parser that parses the stream with [a] to get [v],
    and then returns [f v]. *)

val bind : 'a parser -> ('a -> 'b parser) -> 'b parser
(** [bind a f] is a parser that first uses [a] to parse a value [v],
    then uses [f v] to select the next parser, and then uses that. *)

val format_errors : 'a parser -> ('a, [> `Msg of string]) result parser
(** [format_errors p] catches [Failure], [End_of_file] and
    [Buffer_limit_exceeded] exceptions and returns them as a formatted error message. *)

(** Convenient syntax for some of the combinators. *)
module Syntax : sig
  val ( let+ ) : 'a parser -> ('a -> 'b) -> 'b parser
  (** Syntax for {!map}. *)

  val ( let* ) : 'a parser -> ('a -> 'b parser) -> 'b parser
  (** Syntax for {!bind} *)

  val ( and+ ) : 'a parser -> 'b parser -> ('a * 'b) parser
  (** Syntax for {!pair} *)

  val ( and* ) : 'a parser -> 'b parser -> ('a * 'b) parser
  (** Syntax for {!pair} (same as [and+]). *)

  val ( <*> ) : 'a parser -> 'b parser -> ('a * 'b) parser
  (** [a <*> b] is [pair a b]. *)

  val ( <* ) : 'a parser -> 'b parser -> 'a parser
  (** [a <* b] is [map fst (pair a b)].
      It parses two things and keeps only the first. *)

  val ( *> ) : 'a parser -> 'b parser -> 'b parser
  (** [a *> b] is [map snd (pair a b)].
      It parses two things and keeps only the second. *)
end

(** {2 Low-level API} *)

val buffered_bytes : t -> int
(** [buffered_bytes t] is the number of bytes that can be read without
    reading from the underlying flow. *)

val peek : t -> Cstruct.t
(** [peek t] returns a view onto the active part of [t]'s internal buffer.

    Performing any operation that might add to the buffer may invalidate this,
    so it should be used immediately and then forgotten.

    [Cstruct.length (peek t) = buffered_bytes t]. *)

val ensure : t -> int -> unit
(** [ensure t n] ensures that the buffer contains at least [n] bytes of data.

    If not, it reads from the flow until there is.

    [buffered_bytes (ensure t n) >= n].

    @raise End_of_file if the flow ended before [n] bytes were available
    @raise Buffer_limit_exceeded if [n] exceeds the buffer's maximum size *)

val consume : t -> int -> unit
(** [consume t n] discards the first [n] bytes from [t]'s buffer.

    Use this after {!peek} to mark some bytes as consumed.

    [buffered_bytes t' = buffered_bytes t - n]

    Note: unlike {!skip}, this will not read data from the underlying flow. *)

val consumed_bytes : t -> int
(** [consumed_bytes t] is the total number of bytes consumed.

    i.e. it is the offset into the stream of the next byte to be parsed. *)

val eof_seen : t -> bool
(** [eof_seen t] indicates whether we've received [End_of_file] from the underlying flow.

    If so, there will never be any further data beyond what [peek] already returns.

    Note that this returns [false] if we're at the end of the stream but don't know it yet.
    Use {!at_end_of_input} to be sure. *)