|
| 1 | +//! An example of using syntect for testing syntax definitions. |
| 2 | +//! Basically exactly the same as what Sublime Text can do, |
| 3 | +//! but without needing ST installed |
| 4 | +extern crate syntect; |
| 5 | +extern crate walkdir; |
| 6 | +#[macro_use] |
| 7 | +extern crate lazy_static; |
| 8 | +extern crate regex; |
| 9 | +//extern crate onig; |
| 10 | +use syntect::parsing::{SyntaxSet, ParseState, ScopeStack, Scope}; |
| 11 | +use syntect::highlighting::ScopeSelectors; |
| 12 | +use syntect::easy::{ScopeRegionIterator}; |
| 13 | + |
| 14 | +use std::path::Path; |
| 15 | +use std::io::{BufRead, BufReader}; |
| 16 | +use std::fs::File; |
| 17 | +use std::cmp::{min, max}; |
| 18 | +use walkdir::{DirEntry, WalkDir, WalkDirIterator}; |
| 19 | +use std::str::FromStr; |
| 20 | +use regex::Regex; |
| 21 | + |
| 22 | +#[derive(Debug, Clone, PartialEq, Eq)] |
| 23 | +pub enum SyntaxTestHeaderError { |
| 24 | + MalformedHeader, |
| 25 | + SyntaxDefinitionNotFound, |
| 26 | +} |
| 27 | + |
| 28 | +#[derive(Debug, Clone, PartialEq, Eq)] |
| 29 | +pub enum SyntaxTestFileResult { |
| 30 | + FailedAssertions(usize, usize), |
| 31 | + Success(usize), |
| 32 | +} |
| 33 | + |
| 34 | +lazy_static! { |
| 35 | + pub static ref SYNTAX_TEST_HEADER_PATTERN: Regex = Regex::new(r#"(?xm) |
| 36 | + ^(?P<testtoken_start>\s*\S+) |
| 37 | + \s+SYNTAX\sTEST\s+ |
| 38 | + "(?P<syntax_file>[^"]+)" |
| 39 | + \s*(?P<testtoken_end>\S+)?$ |
| 40 | + "#).unwrap(); |
| 41 | + pub static ref SYNTAX_TEST_ASSERTION_PATTERN: Regex = Regex::new(r#"(?xm) |
| 42 | + \s*(?: |
| 43 | + (?P<begin_of_token><-)|(?P<range>\^+) |
| 44 | + )(.+)$"#).unwrap(); |
| 45 | +} |
| 46 | + |
| 47 | +#[derive(Debug)] |
| 48 | +struct AssertionRange<'a> { |
| 49 | + begin_char: usize, |
| 50 | + end_char: usize, |
| 51 | + scope_selector_text: &'a str, |
| 52 | + is_pure_assertion_line: bool, |
| 53 | +} |
| 54 | + |
| 55 | +#[derive(Debug)] |
| 56 | +struct ScopedText { |
| 57 | + scope: Vec<Scope>, |
| 58 | + char_start: usize, |
| 59 | + text_len: usize, |
| 60 | +} |
| 61 | + |
| 62 | +#[derive(Debug)] |
| 63 | +struct RangeTestResult { |
| 64 | + column_begin: usize, |
| 65 | + column_end: usize, |
| 66 | + success: bool, |
| 67 | +} |
| 68 | + |
| 69 | +fn get_line_assertion_details<'a>(testtoken_start: &str, testtoken_end: Option<&str>, line: &'a str) -> Option<AssertionRange<'a>> { |
| 70 | + // if the test start token specified in the test file's header is on the line |
| 71 | + if let Some(index) = line.find(testtoken_start) { |
| 72 | + let (before_token_start, token_and_rest_of_line) = line.split_at(index); |
| 73 | + |
| 74 | + if let Some(captures) = SYNTAX_TEST_ASSERTION_PATTERN.captures(&token_and_rest_of_line[testtoken_start.len()..]) { |
| 75 | + let mut sst = captures.get(3).unwrap().as_str(); // get the scope selector text |
| 76 | + let mut only_whitespace_after_token_end = true; |
| 77 | + |
| 78 | + if let Some(token) = testtoken_end { // if there is an end token defined in the test file header |
| 79 | + if let Some(end_token_pos) = sst.find(token) { // and there is an end token in the line |
| 80 | + let (ss, after_token_end) = sst.split_at(end_token_pos); // the scope selector text ends at the end token |
| 81 | + sst = &ss; |
| 82 | + only_whitespace_after_token_end = after_token_end.trim_right().is_empty(); |
| 83 | + } |
| 84 | + } |
| 85 | + return Some(AssertionRange { |
| 86 | + begin_char: index + if captures.get(2).is_some() { testtoken_start.len() + captures.get(2).unwrap().start() } else { 0 }, |
| 87 | + end_char: index + if captures.get(2).is_some() { testtoken_start.len() + captures.get(2).unwrap().end() } else { 1 }, |
| 88 | + scope_selector_text: sst, |
| 89 | + is_pure_assertion_line: before_token_start.trim_left().is_empty() && only_whitespace_after_token_end, // if only whitespace surrounds the test tokens on the line, then it is a pure assertion line |
| 90 | + }); |
| 91 | + } |
| 92 | + } |
| 93 | + None |
| 94 | +} |
| 95 | + |
| 96 | +fn process_assertions(assertion: &AssertionRange, test_against_line_scopes: &Vec<ScopedText>) -> Vec<RangeTestResult> { |
| 97 | + // format the scope selector to include a space at the beginning, because, currently, ScopeSelector expects excludes to begin with " -" |
| 98 | + // and they are sometimes in the syntax test as ^^^-comment, for example |
| 99 | + let selector = ScopeSelectors::from_str(&format!(" {}", &assertion.scope_selector_text)).unwrap(); |
| 100 | + // find the scope at the specified start column, and start matching the selector through the rest of the tokens on the line from there until the end column is reached |
| 101 | + let mut results = Vec::new(); |
| 102 | + for scoped_text in test_against_line_scopes.iter().skip_while(|s|s.char_start + s.text_len <= assertion.begin_char).take_while(|s|s.char_start < assertion.end_char) { |
| 103 | + let match_value = selector.does_match(scoped_text.scope.as_slice()); |
| 104 | + let result = RangeTestResult { |
| 105 | + column_begin: max(scoped_text.char_start, assertion.begin_char), |
| 106 | + column_end: min(scoped_text.char_start + scoped_text.text_len, assertion.end_char), |
| 107 | + success: match_value.is_some() |
| 108 | + }; |
| 109 | + results.push(result); |
| 110 | + } |
| 111 | + // don't ignore assertions after the newline, they should be treated as though they are asserting against the newline |
| 112 | + let last = test_against_line_scopes.last().unwrap(); |
| 113 | + if last.char_start + last.text_len < assertion.end_char { |
| 114 | + let match_value = selector.does_match(last.scope.as_slice()); |
| 115 | + let result = RangeTestResult { |
| 116 | + column_begin: max(last.char_start + last.text_len, assertion.begin_char), |
| 117 | + column_end: assertion.end_char, |
| 118 | + success: match_value.is_some() |
| 119 | + }; |
| 120 | + results.push(result); |
| 121 | + } |
| 122 | + results |
| 123 | +} |
| 124 | + |
| 125 | +/// If `parse_test_lines` is `false` then lines that only contain assertions are not parsed |
| 126 | +fn test_file(ss: &SyntaxSet, path: &Path, parse_test_lines: bool) -> Result<SyntaxTestFileResult, SyntaxTestHeaderError> { |
| 127 | + let f = File::open(path).unwrap(); |
| 128 | + let mut reader = BufReader::new(f); |
| 129 | + let mut line = String::new(); |
| 130 | + |
| 131 | + // read the first line from the file - if we have reached EOF already, it's an invalid file |
| 132 | + if reader.read_line(&mut line).unwrap() == 0 { |
| 133 | + return Err(SyntaxTestHeaderError::MalformedHeader); |
| 134 | + } |
| 135 | + |
| 136 | + line = line.replace("\r", &""); |
| 137 | + |
| 138 | + // parse the syntax test header in the first line of the file |
| 139 | + let header_line = line.clone(); |
| 140 | + let search_result = SYNTAX_TEST_HEADER_PATTERN.captures(&header_line); |
| 141 | + let captures = try!(search_result.ok_or(SyntaxTestHeaderError::MalformedHeader)); |
| 142 | + |
| 143 | + let testtoken_start = captures.name("testtoken_start").unwrap().as_str(); |
| 144 | + let testtoken_end = captures.name("testtoken_end").map_or(None, |c|Some(c.as_str())); |
| 145 | + let syntax_file = captures.name("syntax_file").unwrap().as_str(); |
| 146 | + |
| 147 | + // find the relevant syntax definition to parse the file with - case is important! |
| 148 | + println!("The test file references syntax definition file: {}", syntax_file); |
| 149 | + let syntax = try!(ss.find_syntax_by_path(syntax_file).ok_or(SyntaxTestHeaderError::SyntaxDefinitionNotFound)); |
| 150 | + |
| 151 | + // iterate over the lines of the file, testing them |
| 152 | + let mut state = ParseState::new(syntax); |
| 153 | + let mut stack = ScopeStack::new(); |
| 154 | + |
| 155 | + let mut current_line_number = 1; |
| 156 | + let mut test_against_line_number = 1; |
| 157 | + let mut scopes_on_line_being_tested = Vec::new(); |
| 158 | + let mut previous_non_assertion_line = line.to_string(); |
| 159 | + |
| 160 | + let mut assertion_failures: usize = 0; |
| 161 | + let mut total_assertions: usize = 0; |
| 162 | + |
| 163 | + loop { // over lines of file, starting with the header line |
| 164 | + let mut line_only_has_assertion = false; |
| 165 | + let mut line_has_assertion = false; |
| 166 | + if let Some(assertion) = get_line_assertion_details(testtoken_start, testtoken_end, &line) { |
| 167 | + let result = process_assertions(&assertion, &scopes_on_line_being_tested); |
| 168 | + total_assertions += &assertion.end_char - &assertion.begin_char; |
| 169 | + for failure in result.iter().filter(|r|!r.success) { |
| 170 | + let chars = &previous_non_assertion_line[failure.column_begin..failure.column_end]; |
| 171 | + println!(" Assertion selector {:?} \ |
| 172 | + from line {:?} failed against line {:?}, column range {:?}-{:?} \ |
| 173 | + (with text {:?}) \ |
| 174 | + has scope {:?}", |
| 175 | + assertion.scope_selector_text.trim(), |
| 176 | + current_line_number, test_against_line_number, failure.column_begin, failure.column_end, |
| 177 | + chars, |
| 178 | + scopes_on_line_being_tested.iter().skip_while(|s|s.char_start + s.text_len <= failure.column_begin).next().unwrap_or(scopes_on_line_being_tested.last().unwrap()).scope |
| 179 | + ); |
| 180 | + assertion_failures += failure.column_end - failure.column_begin; |
| 181 | + } |
| 182 | + line_only_has_assertion = assertion.is_pure_assertion_line; |
| 183 | + line_has_assertion = true; |
| 184 | + } |
| 185 | + if !line_only_has_assertion || parse_test_lines { |
| 186 | + if !line_has_assertion { // ST seems to ignore lines that have assertions when calculating which line the assertion tests against |
| 187 | + scopes_on_line_being_tested.clear(); |
| 188 | + test_against_line_number = current_line_number; |
| 189 | + previous_non_assertion_line = line.to_string(); |
| 190 | + } |
| 191 | + let ops = state.parse_line(&line); |
| 192 | + let mut col: usize = 0; |
| 193 | + for (s, op) in ScopeRegionIterator::new(&ops, &line) { |
| 194 | + stack.apply(op); |
| 195 | + if s.is_empty() { // in this case we don't care about blank tokens |
| 196 | + continue; |
| 197 | + } |
| 198 | + if !line_has_assertion { |
| 199 | + // if the line has no assertions on it, remember the scopes on the line so we can test against them later |
| 200 | + let len = s.chars().count(); |
| 201 | + scopes_on_line_being_tested.push( |
| 202 | + ScopedText { |
| 203 | + char_start: col, |
| 204 | + text_len: len, |
| 205 | + scope: stack.as_slice().to_vec() |
| 206 | + } |
| 207 | + ); |
| 208 | + // TODO: warn when there are duplicate adjacent (non-meta?) scopes, as it is almost always undesired |
| 209 | + col += len; |
| 210 | + } |
| 211 | + } |
| 212 | + } |
| 213 | + |
| 214 | + line.clear(); |
| 215 | + current_line_number += 1; |
| 216 | + if reader.read_line(&mut line).unwrap() == 0 { |
| 217 | + break; |
| 218 | + } |
| 219 | + line = line.replace("\r", &""); |
| 220 | + } |
| 221 | + if assertion_failures > 0 { |
| 222 | + Ok(SyntaxTestFileResult::FailedAssertions(assertion_failures, total_assertions)) |
| 223 | + } else { |
| 224 | + Ok(SyntaxTestFileResult::Success(total_assertions)) |
| 225 | + } |
| 226 | +} |
| 227 | + |
| 228 | +fn main() { |
| 229 | + let args: Vec<String> = std::env::args().collect(); |
| 230 | + let tests_path = if args.len() < 2 { |
| 231 | + "." |
| 232 | + } else { |
| 233 | + &args[1] |
| 234 | + }; |
| 235 | + let syntaxes_path = if args.len() == 3 { |
| 236 | + &args[2] |
| 237 | + } else { |
| 238 | + "" |
| 239 | + }; |
| 240 | + |
| 241 | + // load the syntaxes from disk if told to |
| 242 | + // (as opposed to from the binary dumps) |
| 243 | + // this helps to ensure that a recompile isn't needed |
| 244 | + // when using this for syntax development |
| 245 | + let mut ss = if syntaxes_path.is_empty() { |
| 246 | + SyntaxSet::load_defaults_newlines() // note we load the version with newlines |
| 247 | + } else { |
| 248 | + SyntaxSet::new() |
| 249 | + }; |
| 250 | + if !syntaxes_path.is_empty() { |
| 251 | + println!("loading syntax definitions from {}", syntaxes_path); |
| 252 | + ss.load_syntaxes(&syntaxes_path, true).unwrap(); // note that we load the version with newlines |
| 253 | + ss.link_syntaxes(); |
| 254 | + } |
| 255 | + |
| 256 | + let exit_code = recursive_walk(&ss, &tests_path); |
| 257 | + println!("exiting with code {}", exit_code); |
| 258 | + std::process::exit(exit_code); |
| 259 | + |
| 260 | +} |
| 261 | + |
| 262 | + |
| 263 | +fn recursive_walk(ss: &SyntaxSet, path: &str) -> i32 { |
| 264 | + let mut exit_code: i32 = 0; // exit with code 0 by default, if all tests pass |
| 265 | + let walker = WalkDir::new(path).into_iter(); |
| 266 | + for entry in walker.filter_entry(|e|e.file_type().is_dir() || is_a_syntax_test_file(e)) { |
| 267 | + let entry = entry.unwrap(); |
| 268 | + if entry.file_type().is_file() { |
| 269 | + println!("Testing file {}", entry.path().display()); |
| 270 | + let result = test_file(&ss, entry.path(), true); |
| 271 | + println!("{:?}", result); |
| 272 | + if exit_code != 2 { // leave exit code 2 if there was an error |
| 273 | + if let Err(_) = result { // set exit code 2 if there was an error |
| 274 | + exit_code = 2; |
| 275 | + } else if let Ok(ok) = result { |
| 276 | + if let SyntaxTestFileResult::FailedAssertions(_, _) = ok { |
| 277 | + exit_code = 1; // otherwise, if there were failures, exit with code 1 |
| 278 | + } |
| 279 | + } |
| 280 | + } |
| 281 | + } |
| 282 | + } |
| 283 | + exit_code |
| 284 | +} |
| 285 | + |
| 286 | +fn is_a_syntax_test_file(entry: &DirEntry) -> bool { |
| 287 | + entry.file_name() |
| 288 | + .to_str() |
| 289 | + .map(|s| s.starts_with("syntax_test_")) |
| 290 | + .unwrap_or(false) |
| 291 | +} |
0 commit comments