Skip to content

Commit f5e01f1

Browse files
authored
Merge pull request #44 from forkeith/syntest
added syntest example to run ST syntax tests
2 parents b8c860b + 045dd0c commit f5e01f1

File tree

6 files changed

+351
-8
lines changed

6 files changed

+351
-8
lines changed

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ bincode = "0.6"
2525
flate2 = "^0.2"
2626
fnv = "^1.0"
2727

28+
[dev-dependencies]
29+
regex = "0.2.1"
30+
2831
[features]
2932
static-onig = ["onig/static-libonig"]
3033
assets = []

assets/default_newlines.packdump

700 Bytes
Binary file not shown.

assets/default_nonewlines.packdump

1.9 KB
Binary file not shown.

examples/syntest.rs

Lines changed: 291 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,291 @@
1+
//! An example of using syntect for testing syntax definitions.
2+
//! Basically exactly the same as what Sublime Text can do,
3+
//! but without needing ST installed
4+
extern crate syntect;
5+
extern crate walkdir;
6+
#[macro_use]
7+
extern crate lazy_static;
8+
extern crate regex;
9+
//extern crate onig;
10+
use syntect::parsing::{SyntaxSet, ParseState, ScopeStack, Scope};
11+
use syntect::highlighting::ScopeSelectors;
12+
use syntect::easy::{ScopeRegionIterator};
13+
14+
use std::path::Path;
15+
use std::io::{BufRead, BufReader};
16+
use std::fs::File;
17+
use std::cmp::{min, max};
18+
use walkdir::{DirEntry, WalkDir, WalkDirIterator};
19+
use std::str::FromStr;
20+
use regex::Regex;
21+
22+
#[derive(Debug, Clone, PartialEq, Eq)]
23+
pub enum SyntaxTestHeaderError {
24+
MalformedHeader,
25+
SyntaxDefinitionNotFound,
26+
}
27+
28+
#[derive(Debug, Clone, PartialEq, Eq)]
29+
pub enum SyntaxTestFileResult {
30+
FailedAssertions(usize, usize),
31+
Success(usize),
32+
}
33+
34+
lazy_static! {
35+
pub static ref SYNTAX_TEST_HEADER_PATTERN: Regex = Regex::new(r#"(?xm)
36+
^(?P<testtoken_start>\s*\S+)
37+
\s+SYNTAX\sTEST\s+
38+
"(?P<syntax_file>[^"]+)"
39+
\s*(?P<testtoken_end>\S+)?$
40+
"#).unwrap();
41+
pub static ref SYNTAX_TEST_ASSERTION_PATTERN: Regex = Regex::new(r#"(?xm)
42+
\s*(?:
43+
(?P<begin_of_token><-)|(?P<range>\^+)
44+
)(.+)$"#).unwrap();
45+
}
46+
47+
#[derive(Debug)]
48+
struct AssertionRange<'a> {
49+
begin_char: usize,
50+
end_char: usize,
51+
scope_selector_text: &'a str,
52+
is_pure_assertion_line: bool,
53+
}
54+
55+
#[derive(Debug)]
56+
struct ScopedText {
57+
scope: Vec<Scope>,
58+
char_start: usize,
59+
text_len: usize,
60+
}
61+
62+
#[derive(Debug)]
63+
struct RangeTestResult {
64+
column_begin: usize,
65+
column_end: usize,
66+
success: bool,
67+
}
68+
69+
fn get_line_assertion_details<'a>(testtoken_start: &str, testtoken_end: Option<&str>, line: &'a str) -> Option<AssertionRange<'a>> {
70+
// if the test start token specified in the test file's header is on the line
71+
if let Some(index) = line.find(testtoken_start) {
72+
let (before_token_start, token_and_rest_of_line) = line.split_at(index);
73+
74+
if let Some(captures) = SYNTAX_TEST_ASSERTION_PATTERN.captures(&token_and_rest_of_line[testtoken_start.len()..]) {
75+
let mut sst = captures.get(3).unwrap().as_str(); // get the scope selector text
76+
let mut only_whitespace_after_token_end = true;
77+
78+
if let Some(token) = testtoken_end { // if there is an end token defined in the test file header
79+
if let Some(end_token_pos) = sst.find(token) { // and there is an end token in the line
80+
let (ss, after_token_end) = sst.split_at(end_token_pos); // the scope selector text ends at the end token
81+
sst = &ss;
82+
only_whitespace_after_token_end = after_token_end.trim_right().is_empty();
83+
}
84+
}
85+
return Some(AssertionRange {
86+
begin_char: index + if captures.get(2).is_some() { testtoken_start.len() + captures.get(2).unwrap().start() } else { 0 },
87+
end_char: index + if captures.get(2).is_some() { testtoken_start.len() + captures.get(2).unwrap().end() } else { 1 },
88+
scope_selector_text: sst,
89+
is_pure_assertion_line: before_token_start.trim_left().is_empty() && only_whitespace_after_token_end, // if only whitespace surrounds the test tokens on the line, then it is a pure assertion line
90+
});
91+
}
92+
}
93+
None
94+
}
95+
96+
fn process_assertions(assertion: &AssertionRange, test_against_line_scopes: &Vec<ScopedText>) -> Vec<RangeTestResult> {
97+
// format the scope selector to include a space at the beginning, because, currently, ScopeSelector expects excludes to begin with " -"
98+
// and they are sometimes in the syntax test as ^^^-comment, for example
99+
let selector = ScopeSelectors::from_str(&format!(" {}", &assertion.scope_selector_text)).unwrap();
100+
// find the scope at the specified start column, and start matching the selector through the rest of the tokens on the line from there until the end column is reached
101+
let mut results = Vec::new();
102+
for scoped_text in test_against_line_scopes.iter().skip_while(|s|s.char_start + s.text_len <= assertion.begin_char).take_while(|s|s.char_start < assertion.end_char) {
103+
let match_value = selector.does_match(scoped_text.scope.as_slice());
104+
let result = RangeTestResult {
105+
column_begin: max(scoped_text.char_start, assertion.begin_char),
106+
column_end: min(scoped_text.char_start + scoped_text.text_len, assertion.end_char),
107+
success: match_value.is_some()
108+
};
109+
results.push(result);
110+
}
111+
// don't ignore assertions after the newline, they should be treated as though they are asserting against the newline
112+
let last = test_against_line_scopes.last().unwrap();
113+
if last.char_start + last.text_len < assertion.end_char {
114+
let match_value = selector.does_match(last.scope.as_slice());
115+
let result = RangeTestResult {
116+
column_begin: max(last.char_start + last.text_len, assertion.begin_char),
117+
column_end: assertion.end_char,
118+
success: match_value.is_some()
119+
};
120+
results.push(result);
121+
}
122+
results
123+
}
124+
125+
/// If `parse_test_lines` is `false` then lines that only contain assertions are not parsed
126+
fn test_file(ss: &SyntaxSet, path: &Path, parse_test_lines: bool) -> Result<SyntaxTestFileResult, SyntaxTestHeaderError> {
127+
let f = File::open(path).unwrap();
128+
let mut reader = BufReader::new(f);
129+
let mut line = String::new();
130+
131+
// read the first line from the file - if we have reached EOF already, it's an invalid file
132+
if reader.read_line(&mut line).unwrap() == 0 {
133+
return Err(SyntaxTestHeaderError::MalformedHeader);
134+
}
135+
136+
line = line.replace("\r", &"");
137+
138+
// parse the syntax test header in the first line of the file
139+
let header_line = line.clone();
140+
let search_result = SYNTAX_TEST_HEADER_PATTERN.captures(&header_line);
141+
let captures = try!(search_result.ok_or(SyntaxTestHeaderError::MalformedHeader));
142+
143+
let testtoken_start = captures.name("testtoken_start").unwrap().as_str();
144+
let testtoken_end = captures.name("testtoken_end").map_or(None, |c|Some(c.as_str()));
145+
let syntax_file = captures.name("syntax_file").unwrap().as_str();
146+
147+
// find the relevant syntax definition to parse the file with - case is important!
148+
println!("The test file references syntax definition file: {}", syntax_file);
149+
let syntax = try!(ss.find_syntax_by_path(syntax_file).ok_or(SyntaxTestHeaderError::SyntaxDefinitionNotFound));
150+
151+
// iterate over the lines of the file, testing them
152+
let mut state = ParseState::new(syntax);
153+
let mut stack = ScopeStack::new();
154+
155+
let mut current_line_number = 1;
156+
let mut test_against_line_number = 1;
157+
let mut scopes_on_line_being_tested = Vec::new();
158+
let mut previous_non_assertion_line = line.to_string();
159+
160+
let mut assertion_failures: usize = 0;
161+
let mut total_assertions: usize = 0;
162+
163+
loop { // over lines of file, starting with the header line
164+
let mut line_only_has_assertion = false;
165+
let mut line_has_assertion = false;
166+
if let Some(assertion) = get_line_assertion_details(testtoken_start, testtoken_end, &line) {
167+
let result = process_assertions(&assertion, &scopes_on_line_being_tested);
168+
total_assertions += &assertion.end_char - &assertion.begin_char;
169+
for failure in result.iter().filter(|r|!r.success) {
170+
let chars = &previous_non_assertion_line[failure.column_begin..failure.column_end];
171+
println!(" Assertion selector {:?} \
172+
from line {:?} failed against line {:?}, column range {:?}-{:?} \
173+
(with text {:?}) \
174+
has scope {:?}",
175+
assertion.scope_selector_text.trim(),
176+
current_line_number, test_against_line_number, failure.column_begin, failure.column_end,
177+
chars,
178+
scopes_on_line_being_tested.iter().skip_while(|s|s.char_start + s.text_len <= failure.column_begin).next().unwrap_or(scopes_on_line_being_tested.last().unwrap()).scope
179+
);
180+
assertion_failures += failure.column_end - failure.column_begin;
181+
}
182+
line_only_has_assertion = assertion.is_pure_assertion_line;
183+
line_has_assertion = true;
184+
}
185+
if !line_only_has_assertion || parse_test_lines {
186+
if !line_has_assertion { // ST seems to ignore lines that have assertions when calculating which line the assertion tests against
187+
scopes_on_line_being_tested.clear();
188+
test_against_line_number = current_line_number;
189+
previous_non_assertion_line = line.to_string();
190+
}
191+
let ops = state.parse_line(&line);
192+
let mut col: usize = 0;
193+
for (s, op) in ScopeRegionIterator::new(&ops, &line) {
194+
stack.apply(op);
195+
if s.is_empty() { // in this case we don't care about blank tokens
196+
continue;
197+
}
198+
if !line_has_assertion {
199+
// if the line has no assertions on it, remember the scopes on the line so we can test against them later
200+
let len = s.chars().count();
201+
scopes_on_line_being_tested.push(
202+
ScopedText {
203+
char_start: col,
204+
text_len: len,
205+
scope: stack.as_slice().to_vec()
206+
}
207+
);
208+
// TODO: warn when there are duplicate adjacent (non-meta?) scopes, as it is almost always undesired
209+
col += len;
210+
}
211+
}
212+
}
213+
214+
line.clear();
215+
current_line_number += 1;
216+
if reader.read_line(&mut line).unwrap() == 0 {
217+
break;
218+
}
219+
line = line.replace("\r", &"");
220+
}
221+
if assertion_failures > 0 {
222+
Ok(SyntaxTestFileResult::FailedAssertions(assertion_failures, total_assertions))
223+
} else {
224+
Ok(SyntaxTestFileResult::Success(total_assertions))
225+
}
226+
}
227+
228+
fn main() {
229+
let args: Vec<String> = std::env::args().collect();
230+
let tests_path = if args.len() < 2 {
231+
"."
232+
} else {
233+
&args[1]
234+
};
235+
let syntaxes_path = if args.len() == 3 {
236+
&args[2]
237+
} else {
238+
""
239+
};
240+
241+
// load the syntaxes from disk if told to
242+
// (as opposed to from the binary dumps)
243+
// this helps to ensure that a recompile isn't needed
244+
// when using this for syntax development
245+
let mut ss = if syntaxes_path.is_empty() {
246+
SyntaxSet::load_defaults_newlines() // note we load the version with newlines
247+
} else {
248+
SyntaxSet::new()
249+
};
250+
if !syntaxes_path.is_empty() {
251+
println!("loading syntax definitions from {}", syntaxes_path);
252+
ss.load_syntaxes(&syntaxes_path, true).unwrap(); // note that we load the version with newlines
253+
ss.link_syntaxes();
254+
}
255+
256+
let exit_code = recursive_walk(&ss, &tests_path);
257+
println!("exiting with code {}", exit_code);
258+
std::process::exit(exit_code);
259+
260+
}
261+
262+
263+
fn recursive_walk(ss: &SyntaxSet, path: &str) -> i32 {
264+
let mut exit_code: i32 = 0; // exit with code 0 by default, if all tests pass
265+
let walker = WalkDir::new(path).into_iter();
266+
for entry in walker.filter_entry(|e|e.file_type().is_dir() || is_a_syntax_test_file(e)) {
267+
let entry = entry.unwrap();
268+
if entry.file_type().is_file() {
269+
println!("Testing file {}", entry.path().display());
270+
let result = test_file(&ss, entry.path(), true);
271+
println!("{:?}", result);
272+
if exit_code != 2 { // leave exit code 2 if there was an error
273+
if let Err(_) = result { // set exit code 2 if there was an error
274+
exit_code = 2;
275+
} else if let Ok(ok) = result {
276+
if let SyntaxTestFileResult::FailedAssertions(_, _) = ok {
277+
exit_code = 1; // otherwise, if there were failures, exit with code 1
278+
}
279+
}
280+
}
281+
}
282+
}
283+
exit_code
284+
}
285+
286+
fn is_a_syntax_test_file(entry: &DirEntry) -> bool {
287+
entry.file_name()
288+
.to_str()
289+
.map(|s| s.starts_with("syntax_test_"))
290+
.unwrap_or(false)
291+
}

src/easy.rs

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -148,21 +148,27 @@ static NOOP_OP: ScopeStackOp = ScopeStackOp::Noop;
148148
impl<'a> Iterator for ScopeRegionIterator<'a> {
149149
type Item = (&'a str, &'a ScopeStackOp);
150150
fn next(&mut self) -> Option<Self::Item> {
151-
let next_str_i = if self.index >= self.ops.len() {
152-
if self.last_str_index >= self.line.len() {
153-
return None;
154-
}
151+
if self.index > self.ops.len() {
152+
return None;
153+
}
154+
155+
// region extends up to next operation (ops[index]) or string end if there is none
156+
// note the next operation may be at, last_str_index, in which case the region is empty
157+
let next_str_i = if self.index == self.ops.len() {
155158
self.line.len()
156159
} else {
157160
self.ops[self.index].0
158161
};
159162
let substr = &self.line[self.last_str_index..next_str_i];
160163
self.last_str_index = next_str_i;
164+
165+
// the first region covers everything before the first op, which may be empty
161166
let op = if self.index == 0 {
162167
&NOOP_OP
163168
} else {
164169
&self.ops[self.index-1].1
165170
};
171+
166172
self.index += 1;
167173
Some((substr, op))
168174
}
@@ -175,7 +181,7 @@ mod tests {
175181
use parsing::{SyntaxSet, ParseState, ScopeStack};
176182
use highlighting::ThemeSet;
177183
use std::str::FromStr;
178-
184+
179185
#[test]
180186
fn can_highlight_lines() {
181187
let ps = SyntaxSet::load_defaults_nonewlines();
@@ -219,4 +225,27 @@ mod tests {
219225
}
220226
assert_eq!(token_count, 5);
221227
}
228+
229+
#[test]
230+
fn can_find_regions_with_trailing_newline() {
231+
let ss = SyntaxSet::load_defaults_newlines();
232+
let mut state = ParseState::new(ss.find_syntax_by_extension("rb").unwrap());
233+
let lines = ["# hello world\n", "lol=5+2\n"];
234+
let mut stack = ScopeStack::new();
235+
236+
for line in lines.iter() {
237+
let ops = state.parse_line(&line);
238+
println!("{:?}", ops);
239+
240+
let mut iterated_ops: Vec<&ScopeStackOp> = Vec::new();
241+
for (_, op) in ScopeRegionIterator::new(&ops, &line) {
242+
stack.apply(op);
243+
iterated_ops.push(&op);
244+
println!("{:?}", op);
245+
}
246+
247+
let all_ops: Vec<&ScopeStackOp> = ops.iter().map(|t|&t.1).collect();
248+
assert_eq!(all_ops.len(), iterated_ops.len() - 1); // -1 because we want to ignore the NOOP
249+
}
250+
}
222251
}

0 commit comments

Comments
 (0)