1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
|
use {
anyhow::Result,
regex_lite::{Regex, RegexBuilder},
regex_test::{
CompiledRegex, Match, RegexTest, Span, TestResult, TestRunner,
},
};
/// Tests the default configuration of the hybrid NFA/DFA.
#[test]
fn default() -> Result<()> {
let mut runner = TestRunner::new()?;
runner
.expand(&["is_match", "find", "captures"], |test| test.compiles())
.blacklist_iter(super::BLACKLIST)
.test_iter(crate::suite()?.iter(), compiler)
.assert();
Ok(())
}
fn run_test(re: &Regex, test: &RegexTest) -> TestResult {
let hay = match std::str::from_utf8(test.haystack()) {
Ok(hay) => hay,
Err(err) => {
return TestResult::fail(&format!(
"haystack is not valid UTF-8: {}",
err
));
}
};
match test.additional_name() {
"is_match" => TestResult::matched(re.is_match(hay)),
"find" => TestResult::matches(
re.find_iter(hay)
.take(test.match_limit().unwrap_or(std::usize::MAX))
.map(|m| Match {
id: 0,
span: Span { start: m.start(), end: m.end() },
}),
),
"captures" => {
let it = re
.captures_iter(hay)
.take(test.match_limit().unwrap_or(std::usize::MAX))
.map(|caps| testify_captures(&caps));
TestResult::captures(it)
}
name => TestResult::fail(&format!("unrecognized test name: {}", name)),
}
}
/// Converts the given regex test to a closure that searches with a
/// `bytes::Regex`. If the test configuration is unsupported, then a
/// `CompiledRegex` that skips the test is returned.
fn compiler(
test: &RegexTest,
_patterns: &[String],
) -> anyhow::Result<CompiledRegex> {
let Some(pattern) = skip_or_get_pattern(test) else {
return Ok(CompiledRegex::skip());
};
let re = RegexBuilder::new(pattern)
.case_insensitive(test.case_insensitive())
.build()?;
Ok(CompiledRegex::compiled(move |test| run_test(&re, test)))
}
/// Whether we should skip the given test or not. If not, return the single
/// pattern from the given test.
fn skip_or_get_pattern(test: &RegexTest) -> Option<&str> {
// We're only testing Regex here, which supports one pattern only.
let pattern = match test.regexes().len() {
1 => &test.regexes()[0],
_ => return None,
};
// If the test name contains 'regex-lite', then we ALWAYS run it. Because
// those tests are specifically designed for regex-lite. So if they fail,
// then something needs attention.
if test.full_name().contains("regex-lite/") {
return Some(pattern);
}
// If the pattern has a \p in it, then we almost certainly don't support
// it. This probably skips more than we intend, but there are likely very
// few tests that contain a \p that isn't also a Unicode class.
if pattern.contains(r"\p") || pattern.contains(r"\P") {
return None;
}
// Similar deal for Perl classes, but we can abide them if the haystack
// is ASCII-only.
if !test.haystack().is_ascii() {
if pattern.contains(r"\d") || pattern.contains(r"\D") {
return None;
}
if pattern.contains(r"\s") || pattern.contains(r"\S") {
return None;
}
if pattern.contains(r"\w") || pattern.contains(r"\W") {
return None;
}
}
// And also same deal for word boundaries.
if !test.haystack().is_ascii() {
if pattern.contains(r"\b") || pattern.contains(r"\B") {
return None;
}
}
// We only test is_match, find_iter and captures_iter. All of those are
// leftmost searches.
if !matches!(test.search_kind(), regex_test::SearchKind::Leftmost) {
return None;
}
// The top-level single-pattern regex API always uses leftmost-first.
if !matches!(test.match_kind(), regex_test::MatchKind::LeftmostFirst) {
return None;
}
// The top-level regex API always runs unanchored searches. ... But we can
// handle tests that are anchored but have only one match.
if test.anchored() && test.match_limit() != Some(1) {
return None;
}
// We don't support tests with explicit search bounds. We could probably
// support this by using the 'find_at' (and such) APIs.
let bounds = test.bounds();
if !(bounds.start == 0 && bounds.end == test.haystack().len()) {
return None;
}
// The Regex API specifically does not support disabling UTF-8 mode because
// it can only search &str which is always valid UTF-8.
if !test.utf8() {
return None;
}
// regex-lite doesn't support Unicode-aware case insensitive matching.
if test.case_insensitive()
&& (!pattern.is_ascii() || !test.haystack().is_ascii())
{
return None;
}
Some(pattern)
}
/// Convert `Captures` into the test suite's capture values.
fn testify_captures(caps: ®ex_lite::Captures<'_>) -> regex_test::Captures {
let spans = caps.iter().map(|group| {
group.map(|m| regex_test::Span { start: m.start(), end: m.end() })
});
// This unwrap is OK because we assume our 'caps' represents a match, and
// a match always gives a non-zero number of groups with the first group
// being non-None.
regex_test::Captures::new(0, spans).unwrap()
}
|