1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
|
//! Run tests from Oniguruma's test suite, see `oniguruma/README.md`
use std::collections::HashMap;
use std::panic;
use regex::Regex;
use fancy_regex::Regex as FancyRegex;
#[derive(Debug, Eq, Hash, PartialEq)]
struct Test {
source: String,
pattern: String,
text: String,
assertion: Assertion,
}
#[derive(Debug, Eq, Hash, PartialEq)]
enum Assertion {
Match {
group: usize,
start: usize,
end: usize,
},
NoMatch,
}
/// Extract tests from the C source file (or the ignore file).
///
/// Returns a vec of tuple of the test data and the comment for the test.
fn parse_tests(test_source: &str) -> Vec<(Test, String)> {
let mut tests = Vec::new();
let c_string = r#""((?:\\\\|\\"|[^"])*)""#;
let re = Regex::new(&format!(
r"(?m)((?:^ //.*\n)*)^\s*((x2|x3|n)\({},\s*{},?([^\)]+)\);)",
c_string, c_string
))
.unwrap();
for caps in re.captures_iter(test_source) {
let comment = caps
.get(1)
.unwrap()
.as_str()
.replace(" // ", "")
.trim()
.to_string();
let source = caps.get(2).unwrap().as_str().to_string();
let kind = caps.get(3).unwrap().as_str();
let pattern = unescape(caps.get(4).unwrap().as_str());
let text = unescape(caps.get(5).unwrap().as_str());
let args: Vec<usize> = caps
.get(6)
.unwrap()
.as_str()
.split(",")
.map(|s| s.trim().parse().unwrap())
.collect();
let assertion = match kind {
"x2" => Assertion::Match {
start: args[0],
end: args[1],
group: 0,
},
"x3" => Assertion::Match {
start: args[0],
end: args[1],
group: args[2],
},
"n" => Assertion::NoMatch,
_ => {
panic!("Unexpected test type {}", kind);
}
};
let test = Test {
source,
pattern,
text,
assertion,
};
tests.push((test, comment));
}
tests
}
/// Unescape a string as it appears in C source. This is probably not a perfect implementation, but
/// it's good enough for these tests.
fn unescape(escaped: &str) -> String {
let mut s: Vec<u8> = Vec::new();
let mut chars = escaped.chars();
while let Some(c) = chars.next() {
match c {
'\\' => {
let next = chars.next().expect("Expected character after backslash");
match next {
'\\' => {
s.push(b'\\');
}
'"' => {
s.push(b'"');
}
'?' => {
// '?' has to be escaped in C to avoid trigraphs
s.push(b'?');
}
'n' => {
s.push(b'\n');
}
'r' => {
s.push(b'\r');
}
'0' => {
// octal escape, e.g. \001
let mut octal = String::new();
octal.push(chars.next().expect("Expected character after \\0"));
octal.push(chars.next().expect("Expected second character after \\0"));
let num =
u8::from_str_radix(&octal, 8).expect("Error parsing octal number");
s.push(num);
}
'x' => {
// hex escape, e.g. \x1f
let mut hex = String::new();
hex.push(chars.next().expect("Expected character after \\x"));
hex.push(chars.next().expect("Expected second character after \\x"));
let num = u8::from_str_radix(&hex, 16).expect("Error parsing hex number");
s.push(num);
}
_ => {
unimplemented!("Unknown escaped character {} in {}", next, escaped);
}
}
}
_ => {
s.append(&mut c.to_string().into_bytes());
}
}
}
// Some strings in the test are invalid UTF-8. We handle them via ignores.
String::from_utf8_lossy(&s).to_string()
}
fn run_test(test: &Test) -> Option<String> {
let Test {
pattern,
text,
assertion,
..
} = test;
let compile_result = FancyRegex::new(&pattern);
if compile_result.is_err() {
let error = format!("{:?}", compile_result.unwrap_err());
return Some(format!("Compile failed: {}", error));
}
match *assertion {
Assertion::Match { group, start, end } => {
let result = panic::catch_unwind(|| {
// compile regex again instead of using above, otherwise:
// "may not be safely transferrable across a catch_unwind boundary"
let regex = FancyRegex::new(&pattern).unwrap();
regex.captures(&text).unwrap()
});
if let Ok(captures_result) = result {
if let Some(captures) = captures_result {
let m = captures.get(group).expect("Expected group to exist");
if m.start() != start || m.end() != end {
Some(format!(
"Match found at start {} and end {} (expected {} and {})",
m.start(),
m.end(),
start,
end
))
} else {
None
}
} else {
Some("No match found".to_string())
}
} else {
Some("Panic while matching".to_string())
}
}
Assertion::NoMatch => {
let regex = FancyRegex::new(&pattern).unwrap();
let result = regex.find(&text).unwrap();
if result.is_some() {
Some("Match found".to_string())
} else {
// We expected it not to match and it didn't -> good
None
}
}
}
}
#[test]
fn oniguruma() {
let tests: Vec<Test> = parse_tests(include_str!("oniguruma/test_utf8.c"))
.into_iter()
.map(|(test, _comment)| test)
.collect();
let ignore: HashMap<Test, String> = parse_tests(include_str!("oniguruma/test_utf8_ignore.c"))
.into_iter()
.collect();
let mut ignored = 0;
let mut success = 0;
for test in tests {
let result = run_test(&test);
if let Some(expected_failure) = ignore.get(&test) {
assert!(result.is_some(),
"Expected ignored test to fail, but it succeeded. Remove it from the ignore file: {}", &test.source);
let failure = result.unwrap();
// rustc before version 1.53 unnecessarily escapes slashes in debug format strings
// meaning when run on our MSRV, we get different failure text than on the latest stable Rust
// so here we try removing the backslash when comparing. After our MSRV is bumped up to
// 1.53 or higher, we can remove this part again.
assert!(failure.starts_with(expected_failure) || failure.replace("\\", "").starts_with(expected_failure),
"Expected failure differed for test, change it in the ignore file: {}\nExpected: {}\nActual : {}\n",
&test.source, &expected_failure, &failure
);
ignored += 1;
} else {
if let Some(failure) = result {
// This is a weird way to do the assertions, but the nice thing about it is that we
// can run the tests without an "ignore" file and instead of failing, print the
// content for the ignore file. To do that, disable the assert and enable the print:
// println!(" // {}\n {}\n", failure, test.source);
assert!(false, "Test {} failed: {}", &test.source, failure);
} else {
// println!("Success: {}", test.source);
success += 1;
}
}
}
println!(
"{} successful Oniguruma tests, {} ignored",
success, ignored
);
}
|