File: Parsers.jl

package info (click to toggle)
julia 1.5.3%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 91,132 kB
  • sloc: lisp: 278,486; ansic: 60,186; cpp: 29,801; sh: 2,403; makefile: 1,998; pascal: 1,313; objc: 647; javascript: 516; asm: 226; python: 161; xml: 34
file content (593 lines) | stat: -rw-r--r-- 26,147 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
module Parsers

using Dates

include("utils.jl")

"""
    `Parsers.Options` is a structure for holding various parsing settings when calling `Parsers.parse`, `Parsers.tryparse`, and `Parsers.xparse`. They include:

  * `sentinel=nothing`: valid values include: `nothing` meaning don't check for sentinel values; `missing` meaning an "empty field" should be considered a sentinel value; or a `Vector{String}` of the various string values that should each be checked as a sentinel value. Note that sentinels will always be checked longest to shortest, with the longest valid match taking precedence.
  * `wh1=' '`: the first ascii character to be considered when ignoring leading/trailing whitespace in value parsing
  * `wh2='\t'`: the second ascii character to be considered when ignoring leading/trailing whitespace in value parsing
  * `openquotechar='"'`: the ascii character that signals a "quoted" field while parsing; subsequent characters will be treated as non-significant until a valid `closequotechar` is detected
  * `closequotechar='"'`: the ascii character that signals the end of a quoted field
  * `escapechar='"'`: an ascii character used to "escape" a `closequotechar` within a quoted field
  * `delim=nothing`: if `nothing`, no delimiter will be checked for; if a `Char` or `String`, a delimiter will be checked for directly after parsing a value or `closequotechar`; a newline (`\n`), return (`\r`), or CRLF (`"\r\n"`) are always considered "delimiters", in addition to EOF
  * `decimal='.'`: an ascii character to be used when parsing float values that separates a decimal value
  * `trues=nothing`: if `nothing`, `Bool` parsing will only check for the string `true` or an `Integer` value of `1` as valid values for `true`; as a `Vector{String}`, each string value will be checked to indicate a valid `true` value
  * `falses=nothing`: if `nothing`, `Bool` parsing will only check for the string `false` or an `Integer` value of `0` as valid values for `false`; as a `Vector{String}`, each string value will be checked to indicate a valid `false` value
  * `dateformat=nothing`: if `nothing`, `Date`, `DateTime`, and `Time` parsing will use a default `Dates.DateFormat` object while parsing; a `String` or `Dates.DateFormat` object can be provided for custom format parsing
  * `ignorerepeated=false`: if `true`, consecutive delimiter characters or strings will be consumed until a non-delimiter is encountered; if `false`, only a single delimiter character/string will be consumed. Useful for fixed-width delimited files where fields are padded with delimiters
  * `quoted=false`: whether parsing should check for `openquotechar` and `closequotechar` characters to signal quoted fields
  * `debug=false`: if `true`, various debug logging statements will be printed while parsing; useful when diagnosing why parsing returns certain `Parsers.ReturnCode` values
"""
struct Options{ignorerepeated, Q, debug, S, D, DF}
    sentinel::S # Union{Nothing, Missing, Vector{Tuple{Ptr{UInt8}, Int}}}
    wh1::UInt8
    wh2::UInt8
    oq::UInt8
    cq::UInt8
    e::UInt8
    delim::D # Union{Nothing, UInt8, Tuple{Ptr{UInt8}, Int}}
    decimal::UInt8
    trues::Union{Nothing, Vector{Tuple{Ptr{UInt8}, Int}}}
    falses::Union{Nothing, Vector{Tuple{Ptr{UInt8}, Int}}}
    dateformat::DF # Union{Nothing, Dates.DateFormat}
    strict::Bool
    silencewarnings::Bool
end

prepare(x::Vector{String}) = sort!(map(ptrlen, x), by=x->x[2], rev=true)
asciival(c::Char) = isascii(c)
asciival(b::UInt8) = b < 0x80

function Options(
            sentinel::Union{Nothing, Missing, Vector{String}}, 
            wh1::Union{UInt8, Char},
            wh2::Union{UInt8, Char},
            oq::Union{UInt8, Char},
            cq::Union{UInt8, Char},
            e::Union{UInt8, Char},
            delim::Union{Nothing, UInt8, Char, String},
            decimal::Union{UInt8, Char},
            trues::Union{Nothing, Vector{String}},
            falses::Union{Nothing, Vector{String}},
            dateformat::Union{Nothing, String, Dates.DateFormat},
            ignorerepeated, quoted, debug, strict=false, silencewarnings=false)
    asciival(wh1) && asciival(wh2) || throw(ArgumentError("whitespace characters must be ASCII"))
    asciival(oq) && asciival(cq) && asciival(e) || throw(ArgumentError("openquotechar, closequotechar, and escapechar must be ASCII characters"))
    (wh1 == delim) || (wh2 == delim) && throw(ArgumentError("whitespace characters must be different than delim argument"))
    (oq == delim) || (cq == delim) || (e == delim) && throw(ArgumentError("delim argument must be different than openquotechar, closequotechar, and escapechar arguments"))
    if sentinel isa Vector{String}
        for sent in sentinel
            if startswith(sent, string(Char(wh1))) || startswith(sent, string(Char(wh2)))
                throw(ArgumentError("sentinel value isn't allowed to start with wh1 or wh2 characters"))
            end
            if startswith(sent, string(Char(oq))) || startswith(sent, string(Char(cq)))
                throw(ArgumentError("sentinel value isn't allowed to start with openquotechar, closequotechar, or escapechar characters"))
            end
            if (delim isa UInt8 || delim isa Char) && startswith(sent, string(Char(delim)))
                throw(ArgumentError("sentinel value isn't allowed to start with a delimiter character"))
            elseif delim isa String && startswith(sent, delim)
                throw(ArgumentError("sentinel value isn't allowed to start with a delimiter string"))
            end
        end
    end
    sent = sentinel === nothing || sentinel === missing ? sentinel : prepare(sentinel)
    del = delim === nothing ? nothing : delim isa String ? ptrlen(delim) : delim % UInt8
    trues = trues === nothing ? nothing : prepare(trues)
    falses = falses === nothing ? nothing : prepare(falses)
    df = dateformat === nothing ? nothing : dateformat isa String ? Dates.DateFormat(dateformat) : dateformat
    return Options{ignorerepeated, quoted, debug, typeof(sent), typeof(del), typeof(df)}(sent, wh1 % UInt8, wh2 % UInt8, oq % UInt8, cq % UInt8, e % UInt8, del, decimal % UInt8, trues, falses, df, strict, silencewarnings)
end

Options(;
    sentinel::Union{Nothing, Missing, Vector{String}}=nothing,
    wh1::Union{UInt8, Char}=UInt8(' '),
    wh2::Union{UInt8, Char}=UInt8('\t'),
    openquotechar::Union{UInt8, Char}=UInt8('"'),
    closequotechar::Union{UInt8, Char}=UInt8('"'),
    escapechar::Union{UInt8, Char}=UInt8('"'),
    delim::Union{Nothing, UInt8, Char, String}=nothing,
    decimal::Union{UInt8, Char}=UInt8('.'),
    trues::Union{Nothing, Vector{String}}=nothing,
    falses::Union{Nothing, Vector{String}}=nothing,
    dateformat::Union{Nothing, String, Dates.DateFormat}=nothing,
    ignorerepeated::Bool=false,
    quoted::Bool=false,
    debug::Bool=false,
) = Options(sentinel, wh1, wh2, openquotechar, closequotechar, escapechar, delim, decimal, trues, falses, dateformat, ignorerepeated, quoted, debug)

const OPTIONS = Options(nothing, UInt8(' '), UInt8('\t'), UInt8('"'), UInt8('"'), UInt8('"'), nothing, UInt8('.'), nothing, nothing, nothing, false, false, false)
const XOPTIONS = Options(missing, UInt8(' '), UInt8('\t'), UInt8('"'), UInt8('"'), UInt8('"'), UInt8(','), UInt8('.'), nothing, nothing, nothing, false, true, false)

# high-level convenience functions like in Base
"Attempt to parse a value of type `T` from string `buf`. Throws `Parsers.Error` on parser failures and invalid values."
function parse(::Type{T}, buf::Union{AbstractVector{UInt8}, AbstractString, IO}, options=OPTIONS; pos::Integer=1, len::Integer=buf isa IO ? 0 : sizeof(buf)) where {T}
    x, code, vpos, vlen, tlen = xparse(T, buf isa AbstractString ? codeunits(buf) : buf, pos, len, options)
    return ok(code) ? x : throw(Error(buf, T, code, pos, tlen))
end

"Attempt to parse a value of type `T` from `buf`. Returns `nothing` on parser failures and invalid values."
function tryparse(::Type{T}, buf::Union{AbstractVector{UInt8}, AbstractString, IO}, options=OPTIONS; pos::Integer=1, len::Integer=buf isa IO ? 0 : sizeof(buf)) where {T}
    x, code, vpos, vlen, tlen = xparse(T, buf isa AbstractString ? codeunits(buf) : buf, pos, len, options)
    return ok(code) ? x : nothing
end

default(::Type{T}) where {T <: Integer} = zero(T)
default(::Type{T}) where {T <: AbstractFloat} = T(0.0)
default(::Type{T}) where {T <: Dates.TimeType} = T(0)

# for testing purposes only, it's much too slow to dynamically create Options for every xparse call
"""
    Parsers.xparse(T, buf, pos, len, options) => (x, code, startpos, value_len, total_len)

    The core parsing function for any type `T`. Takes a `buf`, which can be a `Vector{UInt8}`, `Base.CodeUnits`,
    or an `IO`. `pos` is the byte position to begin parsing at. `len` is the total # of bytes in `buf` (signaling eof).
    `options` is an instance of `Parsers.Options`.

    `Parsers.xparse` returns a tuple of 5 values:
      * `x` is a value of type `T`, even if parsing does not succeed
      * `code` is a bitmask of parsing codes, use `Parsers.codes(code)` or `Parsers.text(code)` to see the various bit values set. See `?Parsers.ReturnCode` for additional details on the various parsing codes
      * `startpos`: the starting byte position of the value being parsed; will always equal the start `pos` passed in, except for quoted field where it will point instead to the first byte after the open quote character
      * `value_len`: the # of bytes consumed while parsing a value, will be equal to the total number of bytes consumed, except for quoted or delimited fields where the quote and delimiter characters will be subtracted out
      * `total_len`: the total # of bytes consumed while parsing a value, including any quote or delimiter characters; this can be added to the starting `pos` to allow calling `Parsers.xparse` again for a subsequent field/value
"""
function xparse end

function xparse(::Type{T}, buf::Union{AbstractVector{UInt8}, AbstractString, IO}; pos::Integer=1, len::Integer=buf isa IO ? 0 : sizeof(buf), sentinel=nothing, wh1::Union{UInt8, Char}=UInt8(' '), wh2::Union{UInt8, Char}=UInt8('\t'), quoted::Bool=true, openquotechar::Union{UInt8, Char}=UInt8('"'), closequotechar::Union{UInt8, Char}=UInt8('"'), escapechar::Union{UInt8, Char}=UInt8('"'), ignorerepeated::Bool=false, delim::Union{UInt8, Char, Tuple{Ptr{UInt8}, Int}, AbstractString, Nothing}=UInt8(','), decimal::Union{UInt8, Char}=UInt8('.'), trues=nothing, falses=nothing, dateformat::Union{Nothing, String, Dates.DateFormat}=nothing, debug::Bool=false) where {T}
    options = Options(sentinel, wh1, wh2, openquotechar, closequotechar, escapechar, delim, decimal, trues, falses, dateformat, ignorerepeated, quoted, debug)
    return xparse(T, buf isa AbstractString ? codeunits(buf) : buf, pos, len, options)
end

function xparse(::Type{T}, buf::AbstractString, pos, len, options::Options=XOPTIONS) where {T}
    return xparse(T, codeunits(buf), pos, len, options)
end

@inline function xparse(::Type{T}, source::Union{AbstractVector{UInt8}, IO}, pos, len, options::Options{ignorerepeated, Q, debug, S, D, DF}=XOPTIONS) where {T, ignorerepeated, Q, debug, S, D, DF}
    startpos = vstartpos = vpos = pos
    sentinel = options.sentinel
    code = SUCCESS
    x = default(T)
    quoted = false
    sentinelpos = 0
    if debug
        println("parsing $T, pos=$pos, len=$len")
    end
    if eof(source, pos, len)
        code = (sentinel === missing ? SENTINEL : INVALID) | EOF
        @goto donedone
    end
    b = peekbyte(source, pos)
    if debug
        println("1) parsed: '$(escape_string(string(Char(b))))'")
    end
    # strip leading whitespace
    while b == options.wh1 || b == options.wh2
        if debug
            println("stripping leading whitespace")
        end
        pos += 1
        incr!(source)
        if eof(source, pos, len)
            code = INVALID | EOF
            @goto donedone
        end
        b = peekbyte(source, pos)
        if debug
            println("2) parsed: '$(escape_string(string(Char(b))))'")
        end
    end
    # check for start of quoted field
    if Q
        quoted = b == options.oq
        if quoted
            if debug
                println("detected open quote character")
            end
            code = QUOTED
            pos += 1
            vstartpos = pos
            incr!(source)
            if eof(source, pos, len)
                code |= INVALID_QUOTED_FIELD
                @goto donedone
            end
            b = peekbyte(source, pos)
            if debug
                println("3) parsed: '$(escape_string(string(Char(b))))'")
            end
            # ignore whitespace within quoted field
            while b == options.wh1 || b == options.wh2
                if debug
                    println("stripping whitespace within quoted field")
                end
                pos += 1
                incr!(source)
                if eof(source, pos, len)
                    code |= INVALID_QUOTED_FIELD | EOF
                    @goto donedone
                end
                b = peekbyte(source, pos)
                if debug
                    println("4) parsed: '$(escape_string(string(Char(b))))'")
                end
            end
        end
    end
    # check for sentinel values if applicable
    if sentinel !== nothing && sentinel !== missing
        if debug
            println("checking for sentinel value")
        end
        sentinelpos = checksentinel(source, pos, len, sentinel, debug)
    end
    x, code, pos = typeparser(T, source, pos, len, b, code, options)
    if sentinel !== nothing && sentinel !== missing && sentinelpos >= pos
        # if we matched a sentinel value that was as long or longer than our type value
        code &= ~(OK | INVALID | OVERFLOW)
        pos = sentinelpos
        fastseek!(source, pos - 1)
        code |= SENTINEL
        if eof(source, pos, len)
            code |= EOF
        end
    elseif sentinel === missing && pos == vstartpos
        code &= ~(OK | INVALID)
        code |= SENTINEL
    end
    vpos = pos
    if (code & EOF) == EOF
        if quoted
            # if we detected a quote character, it's an invalid quoted field due to eof in the middle
            code |= INVALID_QUOTED_FIELD
        end
        @goto donedone
    end

@label donevalue
    b = peekbyte(source, pos)
    if debug
        println("finished $T value parsing: pos=$pos, current character: '$(escape_string(string(Char(b))))'")
    end
    # donevalue means we finished parsing a value or sentinel, but didn't reach len, b is still the current byte
    # strip trailing whitespace
    while b == options.wh1 || b == options.wh2
        if debug
            println("stripping trailing whitespace")
        end
        pos += 1
        vpos += 1
        incr!(source)
        if eof(source, pos, len)
            code |= EOF
            if quoted
                code |= INVALID_QUOTED_FIELD
            end
            @goto donedone
        end
        b = peekbyte(source, pos)
        if debug
            println("8) parsed: '$(escape_string(string(Char(b))))'")
        end
    end
    if Q
        # for quoted fields, find the closing quote character
        # we should be positioned at the correct place to find the closing quote character if everything is as it should be
        # if we don't find the quote character immediately, something's wrong, so mark INVALID
        if quoted
            if debug
                println("looking for close quote character")
            end
            same = options.cq == options.e
            first = true
            while true
                vpos = pos
                pos += 1
                incr!(source)
                if same && b == options.e
                    if eof(source, pos, len)
                        code |= EOF
                        if !first
                            code |= INVALID
                        end
                        @goto donedone
                    elseif peekbyte(source, pos) != options.cq
                        if !first
                            code |= INVALID
                        end
                        break
                    end
                    code |= ESCAPED_STRING
                    pos += 1
                    incr!(source)
                elseif b == options.e
                    if eof(source, pos, len)
                        code |= INVALID_QUOTED_FIELD | EOF
                        @goto donedone
                    end
                    code |= ESCAPED_STRING
                    pos += 1
                    incr!(source)
                elseif b == options.cq
                    if !first
                        code |= INVALID
                    end
                    if eof(source, pos, len)
                        code |= EOF
                        @goto donedone
                    end
                    break
                end
                if eof(source, pos, len)
                    code |= INVALID_QUOTED_FIELD | EOF
                    @goto donedone
                end
                first = false
                b = peekbyte(source, pos)
                if debug
                    println("9) parsed: '$(escape_string(string(Char(b))))'")
                end
            end
            b = peekbyte(source, pos)
            if debug
                println("10) parsed: '$(escape_string(string(Char(b))))'")
            end
            # ignore whitespace after quoted field
            while b == options.wh1 || b == options.wh2
                if debug
                    println("stripping trailing whitespace after close quote character")
                end
                pos += 1
                incr!(source)
                if eof(source, pos, len)
                    code |= EOF
                    @goto donedone
                end
                b = peekbyte(source, pos)
                if debug
                    println("11) parsed: '$(escape_string(string(Char(b))))'")
                end
            end
        end
    end

    if options.delim !== nothing
        delim = options.delim
        # now we check for a delimiter; if we don't find it, keep parsing until we do
        if debug
            println("checking for delimiter: pos=$pos")
        end
        if !ignorerepeated
            # we're checking for a single appearance of a delimiter
            if delim isa UInt8
                if b == delim
                    pos += 1
                    incr!(source)
                    code |= DELIMITED
                    @goto donedone
                end
            else
                predelimpos = pos
                pos = checkdelim(source, pos, len, delim)
                if pos > predelimpos
                    # found the delimiter we were looking for
                    code |= DELIMITED
                    @goto donedone
                end
            end
        else
            # keep parsing as long as we keep matching delim
            if delim isa UInt8
                matched = false
                while b == delim
                    matched = true
                    pos += 1
                    incr!(source)
                    if eof(source, pos, len)
                        code |= DELIMITED
                        @goto donedone
                    end
                    b = peekbyte(source, pos)
                    if debug
                        println("12) parsed: '$(escape_string(string(Char(b))))'")
                    end
                end
                if matched
                    code |= DELIMITED
                    @goto donedone
                end
            else
                matched = false
                predelimpos = pos
                pos = checkdelim(source, pos, len, delim)
                while pos > predelimpos
                    matched = true
                    if eof(source, pos, len)
                        code |= DELIMITED
                        @goto donedone
                    end
                    predelimpos = pos
                    pos = checkdelim(source, pos, len, delim)
                end
                if matched
                    code |= DELIMITED
                    @goto donedone
                end
            end
        end
        # didn't find delimiter, but let's check for a newline character
        if b == UInt8('\n')
            pos += 1
            incr!(source)
            code |= NEWLINE | ifelse(eof(source, pos, len), EOF, SUCCESS)
            @goto donedone
        elseif b == UInt8('\r')
            pos += 1
            incr!(source)
            if !eof(source, pos, len) && peekbyte(source, pos) == UInt8('\n')
                pos += 1
                incr!(source)
            end
            code |= NEWLINE | ifelse(eof(source, pos, len), EOF, SUCCESS)
            @goto donedone
        end
        # didn't find delimiter or newline, so we're invalid, keep parsing until we find delimiter, newline, or len
        quo = Int(!quoted)
        code |= INVALID_DELIMITER
        while true
            pos += 1
            vpos += quo
            incr!(source)
            if eof(source, pos, len)
                code |= EOF
                @goto donedone
            end
            b = peekbyte(source, pos)
            if debug
                println("13) parsed: '$(escape_string(string(Char(b))))'")
            end
            if !ignorerepeated
                if delim isa UInt8
                    if b == delim
                        pos += 1
                        incr!(source)
                        code |= DELIMITED
                        @goto donedone
                    end
                else
                    predelimpos = pos
                    pos = checkdelim(source, pos, len, delim)
                    if pos > predelimpos
                        # found the delimiter we were looking for
                        code |= DELIMITED
                        @goto donedone
                    end
                end
            else
                if delim isa UInt8
                    matched = false
                    while b == delim
                        matched = true
                        pos += 1
                        incr!(source)
                        if eof(source, pos, len)
                            code |= DELIMITED
                            @goto donedone
                        end
                        b = peekbyte(source, pos)
                        if debug
                            println("12) parsed: '$(escape_string(string(Char(b))))'")
                        end
                    end
                    if matched
                        code |= DELIMITED
                        @goto donedone
                    end
                else
                    predelimpos = pos
                    pos = checkdelim(source, pos, len, delim)
                    while pos > predelimpos
                        matched = true
                        if eof(source, pos, len)
                            code |= DELIMITED
                            @goto donedone
                        end
                        predelimpos = pos
                        pos = checkdelim(source, pos, len, delim)
                    end
                    if matched
                        code |= DELIMITED
                        @goto donedone
                    end
                end
            end
            # didn't find delimiter, but let's check for a newline character
            if b == UInt8('\n')
                pos += 1
                incr!(source)
                code |= NEWLINE | ifelse(eof(source, pos, len), EOF, SUCCESS)
                @goto donedone
            elseif b == UInt8('\r')
                pos += 1
                incr!(source)
                if !eof(source, pos, len) && peekbyte(source, pos) == UInt8('\n')
                    pos += 1
                    incr!(source)
                end
                code |= NEWLINE | ifelse(eof(source, pos, len), EOF, SUCCESS)
                @goto donedone
            end
        end
    end

@label donedone
    if debug
        println("finished parsing: $(codes(code))")
    end
    return x, code, Int64(vstartpos), Int64(vpos - vstartpos), Int64(pos - startpos)
end

function checkdelim!(buf, pos, len, options::Options{ignorerepeated}) where {ignorerepeated}
    pos > len && return pos
    delim = options.delim
    @inbounds b = buf[pos]
    valuepos = pos
    if !ignorerepeated
        # we're checking for a single appearance of a delimiter
        if delim isa UInt8
            b == delim && return pos + 1
        else
            pos = checkdelim(buf, pos, len, delim)
            pos > valuepos && return pos
        end
    else
        # keep parsing as long as we keep matching delim
        if delim isa UInt8
            matched = false
            while b == delim
                matched = true
                pos += 1
                pos > len && return pos
                @inbounds b = buf[pos]
            end
            matched && return pos
        else
            matched = false
            predelimpos = pos
            pos = checkdelim(buf, pos, len, delim)
            while pos > predelimpos
                matched = true
                pos > len && return pos
                predelimpos = pos
                pos = checkdelim(buf, pos, len, delim)
            end
            matched && return pos
        end
    end
    return pos
end

include("ints.jl")
include("floats.jl")
include("strings.jl")
include("bools.jl")
include("dates.jl")

function __init__()
    # floats.jl globals
    Threads.resize_nthreads!(ONES)
    foreach(x->MPZ.init!(x), ONES)
    Threads.resize_nthreads!(NUMS)
    foreach(x->MPZ.init!(x), NUMS)
    Threads.resize_nthreads!(QUOS)
    foreach(x->MPZ.init!(x), QUOS)
    Threads.resize_nthreads!(REMS)
    foreach(x->MPZ.init!(x), REMS)
    Threads.resize_nthreads!(SCLS)
    foreach(x->MPZ.init!(x), SCLS)
    return
end

end # module