1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
|
//! An example of using syntect for testing syntax definitions.
//! Basically exactly the same as what Sublime Text can do,
//! but without needing ST installed
// To run tests only for a particular package, while showing the operations, you could use:
// cargo run --example syntest -- --debug testdata/Packages/Makefile/
// to specify that the syntax definitions should be parsed instead of loaded from the dump file,
// you can tell it where to parse them from - the following will execute only 1 syntax test after
// parsing the sublime-syntax files in the JavaScript folder:
// cargo run --example syntest testdata/Packages/JavaScript/syntax_test_json.json testdata/Packages/JavaScript/
use syntect::easy::ScopeRegionIterator;
use syntect::highlighting::ScopeSelectors;
use syntect::parsing::{ParseState, Scope, ScopeStack, SyntaxSet, SyntaxSetBuilder};
use std::cmp::{max, min};
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::Path;
use std::str::FromStr;
use std::time::Instant;
use getopts::Options;
use once_cell::sync::Lazy;
use regex::Regex;
use walkdir::{DirEntry, WalkDir};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SyntaxTestHeaderError {
MalformedHeader,
SyntaxDefinitionNotFound,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SyntaxTestFileResult {
FailedAssertions(usize, usize),
Success(usize),
}
pub static SYNTAX_TEST_HEADER_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r#"(?xm)
^(?P<testtoken_start>\s*\S+)
\s+SYNTAX\sTEST\s+
"(?P<syntax_file>[^"]+)"
\s*(?P<testtoken_end>\S+)?$
"#,
)
.unwrap()
});
pub static SYNTAX_TEST_ASSERTION_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r#"(?xm)
\s*(?:
(?P<begin_of_token><-)|(?P<range>\^+)
)(.*)$"#,
)
.unwrap()
});
#[derive(Clone, Copy)]
struct OutputOptions {
time: bool,
debug: bool,
summary: bool,
}
#[derive(Debug)]
struct AssertionRange<'a> {
begin_char: usize,
end_char: usize,
scope_selector_text: &'a str,
is_pure_assertion_line: bool,
}
#[derive(Debug)]
struct ScopedText {
scope: Vec<Scope>,
char_start: usize,
text_len: usize,
}
#[derive(Debug)]
struct RangeTestResult {
column_begin: usize,
column_end: usize,
success: bool,
}
fn get_line_assertion_details<'a>(
testtoken_start: &str,
testtoken_end: Option<&str>,
line: &'a str,
) -> Option<AssertionRange<'a>> {
// if the test start token specified in the test file's header is on the line
if let Some(index) = line.find(testtoken_start) {
let (before_token_start, token_and_rest_of_line) = line.split_at(index);
if let Some(captures) =
SYNTAX_TEST_ASSERTION_PATTERN.captures(&token_and_rest_of_line[testtoken_start.len()..])
{
let mut sst = captures.get(3).unwrap().as_str(); // get the scope selector text
let mut only_whitespace_after_token_end = true;
if let Some(token) = testtoken_end {
// if there is an end token defined in the test file header
if let Some(end_token_pos) = sst.find(token) {
// and there is an end token in the line
let (ss, after_token_end) = sst.split_at(end_token_pos); // the scope selector text ends at the end token
sst = ss;
only_whitespace_after_token_end = after_token_end.trim_end().is_empty();
}
}
return Some(AssertionRange {
begin_char: index
+ if captures.get(2).is_some() {
testtoken_start.len() + captures.get(2).unwrap().start()
} else {
0
},
end_char: index
+ if captures.get(2).is_some() {
testtoken_start.len() + captures.get(2).unwrap().end()
} else {
1
},
scope_selector_text: sst,
is_pure_assertion_line: before_token_start.trim_start().is_empty()
&& only_whitespace_after_token_end, // if only whitespace surrounds the test tokens on the line, then it is a pure assertion line
});
}
}
None
}
fn process_assertions(
assertion: &AssertionRange<'_>,
test_against_line_scopes: &[ScopedText],
) -> Vec<RangeTestResult> {
// format the scope selector to include a space at the beginning, because, currently, ScopeSelector expects excludes to begin with " -"
// and they are sometimes in the syntax test as ^^^-comment, for example
let selector =
ScopeSelectors::from_str(&format!(" {}", &assertion.scope_selector_text)).unwrap();
// find the scope at the specified start column, and start matching the selector through the rest of the tokens on the line from there until the end column is reached
let mut results = Vec::new();
for scoped_text in test_against_line_scopes
.iter()
.skip_while(|s| s.char_start + s.text_len <= assertion.begin_char)
.take_while(|s| s.char_start < assertion.end_char)
{
let match_value = selector.does_match(scoped_text.scope.as_slice());
let result = RangeTestResult {
column_begin: max(scoped_text.char_start, assertion.begin_char),
column_end: min(
scoped_text.char_start + scoped_text.text_len,
assertion.end_char,
),
success: match_value.is_some(),
};
results.push(result);
}
// don't ignore assertions after the newline, they should be treated as though they are asserting against the newline
let last = test_against_line_scopes.last().unwrap();
if last.char_start + last.text_len < assertion.end_char {
let match_value = selector.does_match(last.scope.as_slice());
let result = RangeTestResult {
column_begin: max(last.char_start + last.text_len, assertion.begin_char),
column_end: assertion.end_char,
success: match_value.is_some(),
};
results.push(result);
}
results
}
/// If `parse_test_lines` is `false` then lines that only contain assertions are not parsed
fn test_file(
ss: &SyntaxSet,
path: &Path,
parse_test_lines: bool,
out_opts: OutputOptions,
) -> Result<SyntaxTestFileResult, SyntaxTestHeaderError> {
use syntect::util::debug_print_ops;
let f = File::open(path).unwrap();
let mut reader = BufReader::new(f);
let mut line = String::new();
// read the first line from the file - if we have reached EOF already, it's an invalid file
if reader.read_line(&mut line).unwrap() == 0 {
return Err(SyntaxTestHeaderError::MalformedHeader);
}
line = line.replace('\r', "");
// parse the syntax test header in the first line of the file
let header_line = line.clone();
let search_result = SYNTAX_TEST_HEADER_PATTERN.captures(&header_line);
let captures = search_result.ok_or(SyntaxTestHeaderError::MalformedHeader)?;
let testtoken_start = captures.name("testtoken_start").unwrap().as_str();
let testtoken_end = captures.name("testtoken_end").map(|c| c.as_str());
let syntax_file = captures.name("syntax_file").unwrap().as_str();
// find the relevant syntax definition to parse the file with - case is important!
if !out_opts.summary {
println!(
"The test file references syntax definition file: {}",
syntax_file
);
}
let syntax = ss
.find_syntax_by_path(syntax_file)
.ok_or(SyntaxTestHeaderError::SyntaxDefinitionNotFound)?;
// iterate over the lines of the file, testing them
let mut state = ParseState::new(syntax);
let mut stack = ScopeStack::new();
let mut current_line_number = 1;
let mut test_against_line_number = 1;
let mut scopes_on_line_being_tested = Vec::new();
let mut previous_non_assertion_line = line.to_string();
let mut assertion_failures: usize = 0;
let mut total_assertions: usize = 0;
loop {
// over lines of file, starting with the header line
let mut line_only_has_assertion = false;
let mut line_has_assertion = false;
if let Some(assertion) = get_line_assertion_details(testtoken_start, testtoken_end, &line) {
let result = process_assertions(&assertion, &scopes_on_line_being_tested);
total_assertions += assertion.end_char - assertion.begin_char;
for failure in result.iter().filter(|r| !r.success) {
let length = failure.column_end - failure.column_begin;
let text: String = previous_non_assertion_line
.chars()
.skip(failure.column_begin)
.take(length)
.collect();
if !out_opts.summary {
println!(
" Assertion selector {:?} \
from line {:?} failed against line {:?}, column range {:?}-{:?} \
(with text {:?}) \
has scope {:?}",
assertion.scope_selector_text.trim(),
current_line_number,
test_against_line_number,
failure.column_begin,
failure.column_end,
text,
scopes_on_line_being_tested
.iter()
.find(|s| s.char_start + s.text_len > failure.column_begin)
.unwrap_or_else(|| scopes_on_line_being_tested.last().unwrap())
.scope
);
}
assertion_failures += failure.column_end - failure.column_begin;
}
line_only_has_assertion = assertion.is_pure_assertion_line;
line_has_assertion = true;
}
if !line_only_has_assertion || parse_test_lines {
if !line_has_assertion {
// ST seems to ignore lines that have assertions when calculating which line the assertion tests against
scopes_on_line_being_tested.clear();
test_against_line_number = current_line_number;
previous_non_assertion_line = line.to_string();
}
if out_opts.debug && !line_only_has_assertion {
println!(
"-- debugging line {} -- scope stack: {:?}",
current_line_number, stack
);
}
let ops = state.parse_line(&line, ss).unwrap();
if out_opts.debug && !line_only_has_assertion {
if ops.is_empty() && !line.is_empty() {
println!("no operations for this line...");
} else {
debug_print_ops(&line, &ops);
}
}
let mut col: usize = 0;
for (s, op) in ScopeRegionIterator::new(&ops, &line) {
stack.apply(op).unwrap();
if s.is_empty() {
// in this case we don't care about blank tokens
continue;
}
if !line_has_assertion {
// if the line has no assertions on it, remember the scopes on the line so we can test against them later
let len = s.chars().count();
scopes_on_line_being_tested.push(ScopedText {
char_start: col,
text_len: len,
scope: stack.as_slice().to_vec(),
});
// TODO: warn when there are duplicate adjacent (non-meta?) scopes, as it is almost always undesired
col += len;
}
}
}
line.clear();
current_line_number += 1;
if reader.read_line(&mut line).unwrap() == 0 {
break;
}
line = line.replace('\r', "");
}
let res = if assertion_failures > 0 {
Ok(SyntaxTestFileResult::FailedAssertions(
assertion_failures,
total_assertions,
))
} else {
Ok(SyntaxTestFileResult::Success(total_assertions))
};
if out_opts.summary {
if let Ok(SyntaxTestFileResult::FailedAssertions(failures, _)) = res {
// Don't print total assertion count so that diffs don't pick up new succeeding tests
println!("FAILED {}: {}", path.display(), failures);
}
} else {
println!("{:?}", res);
}
res
}
fn main() {
let args: Vec<String> = std::env::args().collect();
let mut opts = Options::new();
opts.optflag("d", "debug", "Show parsing results for each test line");
opts.optflag(
"t",
"time",
"Time execution as a more broad-ranging benchmark",
);
opts.optflag("s", "summary", "Print only summary of test failures");
let matches = match opts.parse(&args[1..]) {
Ok(m) => m,
Err(f) => {
panic!("{}", f.to_string())
}
};
let tests_path = if matches.free.is_empty() {
"."
} else {
&args[1]
};
let syntaxes_path = if matches.free.len() < 2 { "" } else { &args[2] };
// load the syntaxes from disk if told to
// (as opposed to from the binary dumps)
// this helps to ensure that a recompile isn't needed
// when using this for syntax development
let mut ss = if syntaxes_path.is_empty() {
SyntaxSet::load_defaults_newlines() // note we load the version with newlines
} else {
SyntaxSet::new()
};
if !syntaxes_path.is_empty() {
println!("loading syntax definitions from {}", syntaxes_path);
let mut builder = SyntaxSetBuilder::new();
builder.add_from_folder(syntaxes_path, true).unwrap(); // note that we load the version with newlines
ss = builder.build();
}
let out_opts = OutputOptions {
debug: matches.opt_present("debug"),
time: matches.opt_present("time"),
summary: matches.opt_present("summary"),
};
let exit_code = recursive_walk(&ss, tests_path, out_opts);
println!("exiting with code {}", exit_code);
std::process::exit(exit_code);
}
fn recursive_walk(ss: &SyntaxSet, path: &str, out_opts: OutputOptions) -> i32 {
let mut exit_code: i32 = 0; // exit with code 0 by default, if all tests pass
let walker = WalkDir::new(path).into_iter();
// accumulate and sort for consistency of diffs across machines
let mut files = Vec::new();
for entry in walker.filter_entry(|e| e.file_type().is_dir() || is_a_syntax_test_file(e)) {
let entry = entry.unwrap();
if entry.file_type().is_file() {
files.push(entry.path().to_owned());
}
}
files.sort();
for path in &files {
if !out_opts.summary {
println!("Testing file {}", path.display());
}
let start = Instant::now();
let result = test_file(ss, path, true, out_opts);
let elapsed = start.elapsed();
if out_opts.time {
let ms = (elapsed.as_secs() * 1_000) + elapsed.subsec_millis() as u64;
println!("{} ms for file {}", ms, path.display());
}
if exit_code != 2 {
// leave exit code 2 if there was an error
if result.is_err() {
// set exit code 2 if there was an error
exit_code = 2;
} else if let Ok(SyntaxTestFileResult::FailedAssertions(_, _)) = result {
exit_code = 1; // otherwise, if there were failures, exit with code 1
}
}
}
exit_code
}
fn is_a_syntax_test_file(entry: &DirEntry) -> bool {
entry
.file_name()
.to_str()
.map(|s| s.starts_with("syntax_test_"))
.unwrap_or(false)
}
|