1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
|
use std::ops::RangeBounds;
use proptest::{prop_assert_eq, proptest};
use regex_automata::nfa::thompson::pikevm::PikeVM;
use regex_automata::nfa::thompson::Config;
use regex_automata::util::escape::DebugHaystack;
use regex_automata::util::syntax::Config as SyntaxConfig;
use crate::engines::pikevm::find_iter;
use crate::input::Input;
use crate::test_rope::SingleByteChunks;
use super::Cache;
fn test(needle: &str, haystack: &[u8]) {
test_with_bounds(needle, haystack, ..)
}
fn test_with_bounds(needle: &str, haystack: &[u8], bounds: impl RangeBounds<usize> + Clone) {
for utf8 in [true, false] {
let regex = PikeVM::builder()
.syntax(SyntaxConfig::new().utf8(utf8))
.thompson(Config::new().utf8(utf8))
.build(needle)
.unwrap();
let mut cache1 = regex.create_cache();
let mut cache2 = Cache::new(®ex);
let input = regex_automata::Input::new(haystack).range(bounds.clone());
let iter1: Vec<_> = regex.find_iter(&mut cache1, input).collect();
let input = Input::new(SingleByteChunks::new(haystack)).range(bounds.clone());
let iter2: Vec<_> = find_iter(®ex, &mut cache2, input).collect();
assert_eq!(iter1, iter2, "matches of {needle} in {:?}", DebugHaystack(haystack));
}
}
#[cfg(feature = "ropey")]
#[test]
fn smoke_test() {
let text = std::fs::read_to_string("test_cases/syntax.rs").unwrap();
let regex =
PikeVM::builder().syntax(SyntaxConfig::new().case_insensitive(true)).build("vec").unwrap();
let mut cache = Cache::new(®ex);
let rope = ropey::Rope::from_str(&text);
let matches: Vec<_> = find_iter(®ex, &mut cache, Input::new(&rope))
.map(|range| rope.byte_slice(range.range()))
.collect();
println!("found {matches:#?} in syntax.rs");
assert_eq!(matches.len(), 68);
}
#[test]
fn any() {
test(".", b" ");
}
#[test]
fn look_around() {
test("^bar", b"foobar");
test("foo$", b"foobar");
test(r"(?m)(?:^|a)+", b"a\naaa\n");
test_with_bounds(r"\b{end}", "š".as_bytes(), 2..3);
let haystack: String =
(0..5 * 4096).map(|i| format!("foƶbar foĆ{0}bar foƶ{0}bar", " ".repeat(i % 31))).collect();
let needle = r"\bfoƶ\b[ ]*\bbar\b";
test(needle, haystack.as_bytes())
}
#[test]
fn maybe_empty() {
test(r"x*", b"x");
test(r"\bx*\b", b"x");
}
proptest! {
#[test]
fn matches(haystack: String, needle: String) {
let Ok(regex) = PikeVM::builder().syntax(SyntaxConfig::new().case_insensitive(true)).build(&needle) else {
return Ok(())
};
let mut cache1 = regex.create_cache();
let mut cache2 = Cache::new(®ex);
let iter1: Vec<_> = regex.find_iter(&mut cache1, &haystack).collect();
let iter2: Vec<_> = find_iter(®ex, &mut cache2, Input::new(SingleByteChunks::new(haystack.as_bytes()))).collect();
prop_assert_eq!(iter1, iter2);
}
#[test]
fn matches_word(haystack: String, needle in r"\\b\PC+\\b") {
let Ok(regex) = PikeVM::builder().syntax(SyntaxConfig::new().case_insensitive(true)).build(&needle) else {
return Ok(())
};
let mut cache1 = regex.create_cache();
let mut cache2 = Cache::new(®ex);
let iter1: Vec<_> = regex.find_iter(&mut cache1, &haystack).collect();
let iter2: Vec<_> = find_iter(®ex, &mut cache2, Input::new(SingleByteChunks::new(haystack.as_bytes()))).collect();
prop_assert_eq!(iter1, iter2);
}
}
|