Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions crates/oxc_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,13 @@ impl<'a> Lexer<'a> {
Self::new(allocator, source_text, source_type, unique)
}

/// Get errors.
/// Only used in benchmarks.
#[cfg(feature = "benchmarking")]
pub fn errors(&self) -> &[OxcDiagnostic] {
&self.errors
}

/// Remaining string from `Source`
pub fn remaining(&self) -> &'a str {
self.source.remaining()
Expand Down
4 changes: 3 additions & 1 deletion tasks/benchmark/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ bench = false
# with only the crates it needs, to speed up the builds
[dependencies]
oxc_allocator = { workspace = true, optional = true }
oxc_ast = { workspace = true, optional = true }
oxc_codegen = { workspace = true, optional = true }
oxc_isolated_declarations = { workspace = true, optional = true }
oxc_linter = { workspace = true, optional = true }
Expand All @@ -86,6 +87,7 @@ serde_json = { workspace = true, optional = true }
[features]
default = [
"dep:oxc_allocator",
"dep:oxc_ast",
"dep:oxc_codegen",
"dep:oxc_isolated_declarations",
"dep:oxc_linter",
Expand All @@ -103,7 +105,7 @@ codspeed_napi = ["criterion2/codspeed", "dep:serde", "dep:serde_json"]

# Features for running each benchmark separately with minimum dependencies that benchmark needs.
# e.g. `cargo build --release -p oxc_benchmark --bench parser --no-default-features --features parser`
lexer = ["dep:oxc_allocator", "dep:oxc_parser", "dep:oxc_span", "dep:oxc_tasks_common"]
lexer = ["dep:oxc_allocator", "dep:oxc_ast", "dep:oxc_parser", "dep:oxc_span", "dep:oxc_tasks_common"]
parser = ["dep:oxc_allocator", "dep:oxc_parser", "dep:oxc_span", "dep:oxc_tasks_common"]
transformer = [
"dep:oxc_allocator",
Expand Down
122 changes: 114 additions & 8 deletions tasks/benchmark/benches/lexer.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,37 @@
#![allow(clippy::disallowed_methods)]
use oxc_allocator::Allocator;
use oxc_ast::{ast::*, Visit};
use oxc_benchmark::{criterion_group, criterion_main, BenchmarkId, Criterion};
use oxc_parser::lexer::{Kind, Lexer};
use oxc_parser::{
lexer::{Kind, Lexer},
Parser,
};
use oxc_span::SourceType;
use oxc_tasks_common::{TestFile, TestFiles};

fn bench_lexer(criterion: &mut Criterion) {
let mut group = criterion.benchmark_group("lexer");

// Lexer lacks awareness of JS grammar, so it gets confused by a few things without the parser
// driving it, notably escapes in regexps and template strings.
// So simplify the input for it, by removing backslashes and converting template strings to
// normal string literals.
// driving it. So simplify the input for it, by replacing these syntaxes with plain strings.
// This ensures lexing completes without generating any errors, which is more realistic.
//
// It's unfortunate that this benchmark doesn't exercise the code paths for these syntaxes,
// but this is the closest we can get to a realistic benchmark of lexer in isolation.
let mut allocator = Allocator::default();
let files = TestFiles::complicated()
.files()
.iter()
.map(|file| TestFile {
url: file.url.clone(),
file_name: file.file_name.clone(),
source_text: file.source_text.replace('\\', " ").replace('`', "'"),
.map(|file| {
let source_type = SourceType::from_path(&file.file_name).unwrap();

let mut cleaner = SourceCleaner::new(&file.source_text);
cleaner.clean(source_type, &allocator);
let source_text = cleaner.source_text;

allocator.reset();

TestFile { url: file.url.clone(), file_name: file.file_name.clone(), source_text }
})
.collect::<Vec<_>>();

Expand All @@ -43,3 +56,96 @@ fn bench_lexer(criterion: &mut Criterion) {

criterion_group!(lexer, bench_lexer);
criterion_main!(lexer);

/// Cleaner of source text.
///
/// Purpose is to allow lexer to complete without any errors.
/// Usually sources Oxc is asked to parse will not produce lexer errors, and generating diagnostics is
/// fairly expensive, so is unrealistic for benchmarking purposes.
///
/// Certain syntax will parse without error, but the lexer alone does not have the context to understand
/// they're fine. Notably this includes syntax where the lexer only consumes the first character and
/// parser would then call back into lexer to complete the job.
///
/// So replace these syntaxes with strings so that lexer can complete without error:
/// * `RegExpLiteral`
/// * `TemplateLiteral`
/// * `JSXText`
struct SourceCleaner {
source_text: String,
replacements: Vec<Replacement>,
}

struct Replacement {
span: Span,
text: String,
}

impl SourceCleaner {
fn new(source_text: &str) -> Self {
Self { source_text: source_text.to_string(), replacements: vec![] }
}

fn clean(&mut self, source_type: SourceType, allocator: &Allocator) {
// Parse
let source_text = self.source_text.clone();
let parser_ret = Parser::new(allocator, &source_text, source_type).parse();
assert!(parser_ret.errors.is_empty());
let program = parser_ret.program;

// Visit AST and compile list of replacements
self.visit_program(&program);

// Make replacements
self.replacements.sort_unstable_by_key(|replacement| replacement.span);

for replacement in self.replacements.iter().rev() {
let span = replacement.span;
self.source_text
.replace_range(span.start as usize..span.end as usize, &replacement.text);
}

// Check lexer can lex it without any errors
let mut lexer = Lexer::new_for_benchmarks(allocator, &self.source_text, source_type);
while lexer.next_token().kind != Kind::Eof {}
assert!(lexer.errors().is_empty());
}

fn replace(&mut self, span: Span, text: String) {
self.replacements.push(Replacement { span, text });
}
}

impl<'a> Visit<'a> for SourceCleaner {
fn visit_reg_exp_literal(&mut self, regexp: &RegExpLiteral<'a>) {
let RegExpPattern::Raw(pattern) = regexp.regex.pattern else { unreachable!() };
let span = Span::sized(regexp.span.start, u32::try_from(pattern.len()).unwrap() + 2);
let text = convert_to_string(pattern);
self.replace(span, text);
}

fn visit_template_literal(&mut self, lit: &TemplateLiteral<'a>) {
let span = lit.span;
let text = span.shrink(1).source_text(&self.source_text);
let text = convert_to_string(text).replace('\n', " ");
self.replace(span, text);
}

fn visit_jsx_text(&mut self, jsx_text: &JSXText<'a>) {
let span = jsx_text.span;
let text = span.source_text(&self.source_text);
let text = convert_to_string(text).replace('\n', " ");
self.replace(span, text);
}
}

#[expect(clippy::naive_bytecount)]
fn convert_to_string(text: &str) -> String {
let single_quote_count = text.as_bytes().iter().filter(|&&b| b == b'\'').count();
let double_quote_count = text.as_bytes().iter().filter(|&&b| b == b'"').count();

let (quote, other_quote) =
if single_quote_count <= double_quote_count { ('\'', "\"") } else { ('"', "'") };
let text = text.replace(quote, other_quote);
format!("{quote}{text}{quote}")
}
Loading