1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
|
use std::error::Error;
use regex_automata::{
dfa::{dense, regex::Regex, Automaton, OverlappingState},
nfa::thompson,
HalfMatch, MatchError, MatchKind, MultiMatch,
};
use crate::util::{BunkPrefilter, SubstringPrefilter};
// Tests that quit bytes in the forward direction work correctly.
#[test]
fn quit_fwd() -> Result<(), Box<dyn Error>> {
let dfa = dense::Builder::new()
.configure(dense::Config::new().quit(b'x', true))
.build("[[:word:]]+$")?;
assert_eq!(
dfa.find_earliest_fwd(b"abcxyz"),
Err(MatchError::Quit { byte: b'x', offset: 3 })
);
assert_eq!(
dfa.find_leftmost_fwd(b"abcxyz"),
Err(MatchError::Quit { byte: b'x', offset: 3 })
);
assert_eq!(
dfa.find_overlapping_fwd(b"abcxyz", &mut OverlappingState::start()),
Err(MatchError::Quit { byte: b'x', offset: 3 })
);
Ok(())
}
// Tests that quit bytes in the reverse direction work correctly.
#[test]
fn quit_rev() -> Result<(), Box<dyn Error>> {
let dfa = dense::Builder::new()
.configure(dense::Config::new().quit(b'x', true))
.thompson(thompson::Config::new().reverse(true))
.build("^[[:word:]]+")?;
assert_eq!(
dfa.find_earliest_rev(b"abcxyz"),
Err(MatchError::Quit { byte: b'x', offset: 3 })
);
assert_eq!(
dfa.find_leftmost_rev(b"abcxyz"),
Err(MatchError::Quit { byte: b'x', offset: 3 })
);
Ok(())
}
// Tests that if we heuristically enable Unicode word boundaries but then
// instruct that a non-ASCII byte should NOT be a quit byte, then the builder
// will panic.
#[test]
#[should_panic]
fn quit_panics() {
dense::Config::new().unicode_word_boundary(true).quit(b'\xFF', false);
}
// Tests that if we attempt an overlapping search using a regex without a
// reverse DFA compiled with 'starts_for_each_pattern', then we get a panic.
#[test]
#[should_panic]
fn incorrect_config_overlapping_search_panics() {
let forward = dense::DFA::new(r"abca").unwrap();
let reverse = dense::Builder::new()
.configure(
dense::Config::new()
.anchored(true)
.match_kind(MatchKind::All)
.starts_for_each_pattern(false),
)
.thompson(thompson::Config::new().reverse(true))
.build(r"abca")
.unwrap();
let re = Regex::builder().build_from_dfas(forward, reverse);
let haystack = "bar abcabcabca abca foo".as_bytes();
re.find_overlapping(haystack, &mut OverlappingState::start());
}
// This tests an intesting case where even if the Unicode word boundary option
// is disabled, setting all non-ASCII bytes to be quit bytes will cause Unicode
// word boundaries to be enabled.
#[test]
fn unicode_word_implicitly_works() -> Result<(), Box<dyn Error>> {
let mut config = dense::Config::new();
for b in 0x80..=0xFF {
config = config.quit(b, true);
}
let dfa = dense::Builder::new().configure(config).build(r"\b")?;
let expected = HalfMatch::must(0, 1);
assert_eq!(dfa.find_leftmost_fwd(b" a"), Ok(Some(expected)));
Ok(())
}
// Tests that we can provide a prefilter to a Regex, and the search reports
// correct results.
#[test]
fn prefilter_works() -> Result<(), Box<dyn Error>> {
let re = Regex::new(r"a[0-9]+")
.unwrap()
.with_prefilter(SubstringPrefilter::new("a"));
let text = b"foo abc foo a1a2a3 foo a123 bar aa456";
let matches: Vec<(usize, usize)> =
re.find_leftmost_iter(text).map(|m| (m.start(), m.end())).collect();
assert_eq!(
matches,
vec![(12, 14), (14, 16), (16, 18), (23, 27), (33, 37),]
);
Ok(())
}
// This test confirms that a prefilter is active by using a prefilter that
// reports false negatives.
#[test]
fn prefilter_is_active() -> Result<(), Box<dyn Error>> {
let text = b"za123";
let re = Regex::new(r"a[0-9]+")
.unwrap()
.with_prefilter(SubstringPrefilter::new("a"));
assert_eq!(re.find_leftmost(b"za123"), Some(MultiMatch::must(0, 1, 5)));
assert_eq!(re.find_leftmost(b"a123"), Some(MultiMatch::must(0, 0, 4)));
let re = re.with_prefilter(BunkPrefilter::new());
assert_eq!(re.find_leftmost(b"za123"), None);
// This checks that the prefilter is used when first starting the search,
// instead of waiting until at least one transition has occurred.
assert_eq!(re.find_leftmost(b"a123"), None);
Ok(())
}
|