1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325
|
(** This module provides fairly efficient non-backtracking parsers.
It is modelled on Angstrom's API, and you should use that if
backtracking is needed.
Example:
{[
let r = Buf_read.of_flow flow ~max_size:1_000_000 in
Buf_read.line r
]}
*)
open Std
type t
(** An input buffer. *)
exception Buffer_limit_exceeded
(** Raised if parsing an item would require enlarging the buffer beyond its configured limit. *)
type 'a parser = t -> 'a
(** An ['a parser] is a function that consumes and returns a value of type ['a].
@raise Failure The flow can't be parsed as a value of type ['a].
@raise End_of_file The flow ended without enough data to parse an ['a].
@raise Buffer_limit_exceeded Parsing the value would exceed the configured size limit. *)
val parse : ?initial_size:int -> max_size:int -> 'a parser -> _ Flow.source -> ('a, [> `Msg of string]) result
(** [parse p flow ~max_size] uses [p] to parse everything in [flow].
It is a convenience function that does
{[
let buf = of_flow flow ~max_size in
format_errors (p <* end_of_input) buf
]}
@param initial_size see {!of_flow}. *)
val parse_exn : ?initial_size:int -> max_size:int -> 'a parser -> _ Flow.source -> 'a
(** [parse_exn] wraps {!parse}, but raises [Failure msg] if that returns [Error (`Msg msg)].
Catching exceptions with [parse] and then raising them might seem pointless,
but this has the effect of turning e.g. an [End_of_file] exception into a [Failure]
with a more user-friendly message. *)
val parse_string : 'a parser -> string -> ('a, [> `Msg of string]) result
(** [parse_string p s] uses [p] to parse everything in [s].
It is defined as [format_errors (p <* end_of_input) (of_string s)] *)
val parse_string_exn : 'a parser -> string -> 'a
(** [parse_string_exn] is like {!parse_string}, but handles errors like {!parse_exn}. *)
val of_flow : ?initial_size:int -> max_size:int -> _ Flow.source -> t
(** [of_flow ~max_size flow] is a buffered reader backed by [flow].
@param initial_size The initial amount of memory to allocate for the buffer.
@param max_size The maximum size to which the buffer may grow.
This must be large enough to hold the largest single item
you want to parse (e.g. the longest line, if using
{!line}), plus any terminator needed to know the value is
complete (e.g. the newline character(s)). This is just to
prevent a run-away input from consuming all memory, and
you can usually just set it much larger than you expect
to need. *)
val of_buffer : Cstruct.buffer -> t
(** [of_buffer buf] is a reader that reads from [buf].
[buf] is used directly, without being copied.
[eof_seen (of_buffer buf) = true].
This module will not modify [buf] itself, but it will expose it via {!peek}. *)
val of_string : string -> t
(** [of_string s] is a reader that reads from [s]. *)
val as_flow : t -> Flow.source_ty r
(** [as_flow t] is a buffered flow.
Reading from it will return data from the buffer,
only reading the underlying flow if the buffer is empty. *)
(** {2 Reading data} *)
val line : string parser
(** [line] parses one line.
Lines can be terminated by either LF or CRLF.
The returned string does not include the terminator.
If [End_of_file] is reached after seeing some data but before seeing a line
terminator, the data seen is returned as the last line. *)
val lines : string Seq.t parser
(** [lines] returns a sequence that lazily reads the next line until the end of the input is reached.
[lines = seq line ~stop:at_end_of_input] *)
val char : char -> unit parser
(** [char c] checks that the next byte is [c] and consumes it.
@raise Failure if the next byte is not [c] *)
val any_char : char parser
(** [any_char] parses one character. *)
val peek_char : char option parser
(** [peek_char] returns [Some c] where [c] is the next character, but does not consume it.
Returns [None] at the end of the input stream rather than raising [End_of_file]. *)
val string : string -> unit parser
(** [string s] checks that [s] is the next string in the stream and consumes it.
@raise Failure if [s] is not a prefix of the stream. *)
val uint8 : int parser
(** [uint8] parses the next byte as an unsigned 8-bit integer. *)
(** Big endian parsers *)
module BE : sig
val uint16 : int parser
(** [uint16] parses the next 2 bytes as the lower 16 bits of an [int] in big-endian byte order *)
val uint32 : int32 parser
(** [uint32] parses the next 4 bytes as an [int32] in big-endian byte order *)
val uint48 : int64 parser
(** [uint48] parses the next 6 bytes as a 48-bit unsigned big-endian integer *)
val uint64 : int64 parser
(** [uint64] parses the next 8 bytes as an [int64] in big-endian byte order *)
val float : float parser
(** [float] parses the next 4 bytes as a [float] in big-endian byte order *)
val double : float parser
(** [double] parses the next 8 bytes as a [float] in big-endian byte order *)
end
(** Little endian parsers *)
module LE : sig
val uint16 : int parser
(** [uint16] parses the next 2 bytes as the lower 16 bits of an [int] in little-endian byte order *)
val uint32 : int32 parser
(** [uint32] parses the next 4 bytes as an [int32] in little-endian byte order *)
val uint48 : int64 parser
(** [uint48] parses the next 6 bytes as a 48-bit unsigned big-endian integer *)
val uint64 : int64 parser
(** [uint64] parses the next 8 bytes as an [int64] in little-endian byte order *)
val float : float parser
(** [float] parses the next 4 bytes as a [float] in little-endian byte order *)
val double : float parser
(** [double] parses the next 8 bytes as a [float] in little-endian byte order *)
end
val take : int -> string parser
(** [take n] takes exactly [n] bytes from the input. *)
val take_all : string parser
(** [take_all] takes all remaining data until end-of-file.
Returns [""] if already at end-of-file.
@raise Buffer_limit_exceeded if the remaining data exceeds or equals the buffer limit
(it needs one extra byte to confirm it has reached end-of-file). *)
val take_while : (char -> bool) -> string parser
(** [take_while p] finds the first byte for which [p] is false
and consumes and returns all bytes before that.
If [p] is true for all remaining bytes, it returns everything until end-of-file.
It will return the empty string if there are no matching characters
(and therefore never raises [End_of_file]). *)
val take_while1 : (char -> bool) -> string parser
(** [take_while1 p] is like [take_while]. However, the parser fails with "take_while1"
if at least one character of input hasn't been consumed by the parser. *)
val skip_while : (char -> bool) -> unit parser
(** [skip_while p] skips zero or more bytes for which [p] is [true].
[skip_while p t] does the same thing as [ignore (take_while p t)],
except that it is not limited by the buffer size. *)
val skip_while1 : (char -> bool) -> unit parser
(** [skip_while1 p] is like [skip_while]. However, the parser fails with "skip_while1" if
at least one character of input hasn't been skipped. *)
val skip : int -> unit parser
(** [skip n] discards the next [n] bytes.
[skip n] = [map ignore (take n)],
except that the number of skipped bytes may be larger than the buffer (it will not grow).
Note: if [End_of_file] is raised, all bytes in the stream will have been consumed. *)
val at_end_of_input : bool parser
(** [at_end_of_input] returns [true] when at the end of the stream, or
[false] if there is at least one more byte to be read. *)
val end_of_input : unit parser
(** [end_of_input] checks that there are no further bytes in the stream.
@raise Failure if there are further bytes *)
(** {2 Combinators} *)
val seq : ?stop:bool parser -> 'a parser -> 'a Seq.t parser
(** [seq p] is a sequence that uses [p] to get the next item.
A sequence node can only be used while the stream is at
the expected position, and will raise [Invalid_argument]
if any bytes have been consumed in the meantime. This
also means that each node can only be used once; use
{!Seq.memoize} to make the sequence persistent.
It is not necessary to consume all the elements of the
sequence.
Example ([head 4] is a parser that takes 4 lines):
{[
let head n r =
r |> Buf_read.(seq line) |> Seq.take n |> List.of_seq
]}
@param stop This is used before parsing each item.
The sequence ends if this returns [true].
The default is {!at_end_of_input}. *)
val pair : 'a parser -> 'b parser -> ('a * 'b) parser
(** [pair a b] is a parser that first uses [a] to parse a value [x],
then uses [b] to parse a value [y], then returns [(x, y)].
Note that this module does not support backtracking, so if [b] fails
then the bytes consumed by [a] are lost. *)
val return : 'a -> 'a parser
(** [return x] is a parser that consumes nothing and always returns [x].
[return] is just [Fun.const]. *)
val map : ('a -> 'b) -> ('a parser -> 'b parser)
(** [map f a] is a parser that parses the stream with [a] to get [v],
and then returns [f v]. *)
val bind : 'a parser -> ('a -> 'b parser) -> 'b parser
(** [bind a f] is a parser that first uses [a] to parse a value [v],
then uses [f v] to select the next parser, and then uses that. *)
val format_errors : 'a parser -> ('a, [> `Msg of string]) result parser
(** [format_errors p] catches [Failure], [End_of_file] and
[Buffer_limit_exceeded] exceptions and returns them as a formatted error message. *)
(** Convenient syntax for some of the combinators. *)
module Syntax : sig
val ( let+ ) : 'a parser -> ('a -> 'b) -> 'b parser
(** Syntax for {!map}. *)
val ( let* ) : 'a parser -> ('a -> 'b parser) -> 'b parser
(** Syntax for {!bind} *)
val ( and+ ) : 'a parser -> 'b parser -> ('a * 'b) parser
(** Syntax for {!pair} *)
val ( and* ) : 'a parser -> 'b parser -> ('a * 'b) parser
(** Syntax for {!pair} (same as [and+]). *)
val ( <*> ) : 'a parser -> 'b parser -> ('a * 'b) parser
(** [a <*> b] is [pair a b]. *)
val ( <* ) : 'a parser -> 'b parser -> 'a parser
(** [a <* b] is [map fst (pair a b)].
It parses two things and keeps only the first. *)
val ( *> ) : 'a parser -> 'b parser -> 'b parser
(** [a *> b] is [map snd (pair a b)].
It parses two things and keeps only the second. *)
end
(** {2 Low-level API} *)
val buffered_bytes : t -> int
(** [buffered_bytes t] is the number of bytes that can be read without
reading from the underlying flow. *)
val peek : t -> Cstruct.t
(** [peek t] returns a view onto the active part of [t]'s internal buffer.
Performing any operation that might add to the buffer may invalidate this,
so it should be used immediately and then forgotten.
[Cstruct.length (peek t) = buffered_bytes t]. *)
val ensure : t -> int -> unit
(** [ensure t n] ensures that the buffer contains at least [n] bytes of data.
If not, it reads from the flow until there is.
[buffered_bytes (ensure t n) >= n].
@raise End_of_file if the flow ended before [n] bytes were available
@raise Buffer_limit_exceeded if [n] exceeds the buffer's maximum size *)
val consume : t -> int -> unit
(** [consume t n] discards the first [n] bytes from [t]'s buffer.
Use this after {!peek} to mark some bytes as consumed.
[buffered_bytes t' = buffered_bytes t - n]
Note: unlike {!skip}, this will not read data from the underlying flow. *)
val consumed_bytes : t -> int
(** [consumed_bytes t] is the total number of bytes consumed.
i.e. it is the offset into the stream of the next byte to be parsed. *)
val eof_seen : t -> bool
(** [eof_seen t] indicates whether we've received [End_of_file] from the underlying flow.
If so, there will never be any further data beyond what [peek] already returns.
Note that this returns [false] if we're at the end of the stream but don't know it yet.
Use {!at_end_of_input} to be sure. *)
|