File: api.rs

package info (click to toggle)
rust-regex-automata 0.4.9-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,240 kB
  • sloc: sh: 57; makefile: 2
file content (69 lines) | stat: -rw-r--r-- 2,025 bytes parent folder | download | duplicates (41)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
use std::error::Error;

use regex_automata::{
    dfa::{dense, Automaton, OverlappingState},
    nfa::thompson,
    HalfMatch, Input, MatchError,
};

// Tests that quit bytes in the forward direction work correctly.
#[test]
fn quit_fwd() -> Result<(), Box<dyn Error>> {
    let dfa = dense::Builder::new()
        .configure(dense::Config::new().quit(b'x', true))
        .build("[[:word:]]+$")?;

    assert_eq!(
        Err(MatchError::quit(b'x', 3)),
        dfa.try_search_fwd(&Input::new(b"abcxyz"))
    );
    assert_eq!(
        dfa.try_search_overlapping_fwd(
            &Input::new(b"abcxyz"),
            &mut OverlappingState::start()
        ),
        Err(MatchError::quit(b'x', 3)),
    );

    Ok(())
}

// Tests that quit bytes in the reverse direction work correctly.
#[test]
fn quit_rev() -> Result<(), Box<dyn Error>> {
    let dfa = dense::Builder::new()
        .configure(dense::Config::new().quit(b'x', true))
        .thompson(thompson::Config::new().reverse(true))
        .build("^[[:word:]]+")?;

    assert_eq!(
        Err(MatchError::quit(b'x', 3)),
        dfa.try_search_rev(&Input::new(b"abcxyz"))
    );

    Ok(())
}

// Tests that if we heuristically enable Unicode word boundaries but then
// instruct that a non-ASCII byte should NOT be a quit byte, then the builder
// will panic.
#[test]
#[should_panic]
fn quit_panics() {
    dense::Config::new().unicode_word_boundary(true).quit(b'\xFF', false);
}

// This tests an intesting case where even if the Unicode word boundary option
// is disabled, setting all non-ASCII bytes to be quit bytes will cause Unicode
// word boundaries to be enabled.
#[test]
fn unicode_word_implicitly_works() -> Result<(), Box<dyn Error>> {
    let mut config = dense::Config::new();
    for b in 0x80..=0xFF {
        config = config.quit(b, true);
    }
    let dfa = dense::Builder::new().configure(config).build(r"\b")?;
    let expected = HalfMatch::must(0, 1);
    assert_eq!(Ok(Some(expected)), dfa.try_search_fwd(&Input::new(b" a")));
    Ok(())
}