Skip to content

Commit d66867f

Browse files
Dunqingclaude
andcommitted
perf(formatter): optimize jsdoc formatting to reduce allocations and redundant work
- Hoist FormatOptions clone in format_embedded_js (1 clone instead of up to 4) - Pre-build type-formatter options once per comment instead of per tag - Cache type_name_comment() results in reorder_param_tags (1 parse vs 4 per tag) - Add single-group fast path in sort_tags_by_groups (skip Vec-of-Vec) - Replace FxHashSet<usize> with SmallVec<[usize; 4]> for import indices - Add lazy allocation in normalize_markdown_emphasis via dry-run scan Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent a830173 commit d66867f

4 files changed

Lines changed: 159 additions & 47 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/oxc_formatter/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ natord = { workspace = true }
3636
nodejs-built-in-modules = { workspace = true }
3737
phf = { workspace = true, features = ["macros"] }
3838
rustc-hash = { workspace = true }
39+
smallvec = { workspace = true }
3940
unicode-width = { workspace = true }
4041

4142
[dev-dependencies]

crates/oxc_formatter/src/formatter/jsdoc/normalize.rs

Lines changed: 84 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,19 @@ pub fn normalize_markdown_emphasis(text: &str) -> Cow<'_, str> {
4141
return Cow::Borrowed(text);
4242
}
4343

44+
// Read-only scan: check if any emphasis change would actually occur,
45+
// avoiding a heap allocation when `*` or `__` appear only inside code spans.
46+
if !emphasis_needs_change(text.as_bytes()) {
47+
return Cow::Borrowed(text);
48+
}
49+
4450
// Work with bytes directly — all significant chars (_, *, `, whitespace)
4551
// are ASCII single-byte, so byte-level mutation is safe and uses ~3x less
4652
// memory than Vec<char>.
4753
let mut bytes: Vec<u8> = text.as_bytes().to_vec();
4854
let len = bytes.len();
4955
let mut i = 0;
5056
let mut in_code = false;
51-
let mut changed = false;
5257

5358
// First pass: convert `__` → `**`
5459
while i < len {
@@ -64,7 +69,6 @@ pub fn normalize_markdown_emphasis(text: &str) -> Cow<'_, str> {
6469
if bytes[i] == b'_' && i + 1 < len && bytes[i + 1] == b'_' {
6570
bytes[i] = b'*';
6671
bytes[i + 1] = b'*';
67-
changed = true;
6872
i += 2;
6973
continue;
7074
}
@@ -119,7 +123,6 @@ pub fn normalize_markdown_emphasis(text: &str) -> Cow<'_, str> {
119123
if bytes[j] == b'*' && j > opener + 1 && !bytes[j - 1].is_ascii_whitespace() {
120124
bytes[opener] = b'_';
121125
bytes[j] = b'_';
122-
changed = true;
123126
i = j + 1;
124127
break;
125128
}
@@ -134,15 +137,89 @@ pub fn normalize_markdown_emphasis(text: &str) -> Cow<'_, str> {
134137
i += 1;
135138
}
136139

137-
// If no bytes were actually modified, return the original text without allocation.
138-
if !changed {
139-
return Cow::Borrowed(text);
140-
}
141140
// We only replaced ASCII bytes (_, *) with other ASCII bytes (*, _),
142141
// so UTF-8 validity is preserved.
143142
Cow::Owned(String::from_utf8(bytes).unwrap())
144143
}
145144

145+
/// Read-only scan that checks whether `normalize_markdown_emphasis` would
146+
/// actually change any bytes. Runs the same two-pass logic without mutation.
147+
fn emphasis_needs_change(bytes: &[u8]) -> bool {
148+
let len = bytes.len();
149+
let mut i = 0;
150+
let mut in_code = false;
151+
152+
// Pass 1: would any `__` outside code be converted to `**`?
153+
while i < len {
154+
if bytes[i] == b'`' {
155+
in_code = !in_code;
156+
i += 1;
157+
continue;
158+
}
159+
if in_code {
160+
i += 1;
161+
continue;
162+
}
163+
if bytes[i] == b'_' && i + 1 < len && bytes[i + 1] == b'_' {
164+
return true;
165+
}
166+
i += 1;
167+
}
168+
169+
// Pass 2: would any single `*text*` outside code be converted to `_text_`?
170+
// After pass 1, `__` would become `**`, so `**` sequences need to be skipped.
171+
// Since we're read-only, `__` is still `__` — but the mutation pass would have
172+
// converted it. We simulate that by also skipping `__` as if it were `**`.
173+
in_code = false;
174+
i = 0;
175+
while i < len {
176+
if bytes[i] == b'`' {
177+
in_code = !in_code;
178+
i += 1;
179+
continue;
180+
}
181+
if in_code {
182+
i += 1;
183+
continue;
184+
}
185+
// Skip `**` and `__` (which would become `**` after pass 1)
186+
if (bytes[i] == b'*' || bytes[i] == b'_') && i + 1 < len && bytes[i + 1] == bytes[i] {
187+
i += 2;
188+
continue;
189+
}
190+
// Single `*` — check for matching closing emphasis
191+
if bytes[i] == b'*' && i + 1 < len && !bytes[i + 1].is_ascii_whitespace() {
192+
let opener = i;
193+
let mut j = opener + 1;
194+
while j < len {
195+
if bytes[j] == b'`' {
196+
j += 1;
197+
while j < len && bytes[j] != b'`' {
198+
j += 1;
199+
}
200+
if j < len {
201+
j += 1;
202+
}
203+
continue;
204+
}
205+
if bytes[j] == b'*' && j + 1 < len && bytes[j + 1] == b'*' {
206+
j += 2;
207+
continue;
208+
}
209+
if bytes[j] == b'*' && j > opener + 1 && !bytes[j - 1].is_ascii_whitespace() {
210+
return true;
211+
}
212+
j += 1;
213+
}
214+
i = opener + 1;
215+
continue;
216+
}
217+
i += 1;
218+
}
219+
220+
false
221+
}
222+
146223
/// Capitalize the first ASCII lowercase letter of a string.
147224
/// Skips if the string starts with a backtick (inline code) or a URL.
148225
/// Handles `"- "` prefix iteratively: `"- - hello"` → `"- - Hello"` with a single allocation.

crates/oxc_formatter/src/formatter/jsdoc/serialize.rs

Lines changed: 73 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -220,41 +220,45 @@ fn reorder_param_tags(
220220
return;
221221
}
222222

223-
// Check that ALL @param tags have type annotations and names
224223
let param_tags = &effective_tags[param_start..param_end];
225-
let has_all_types_and_names = param_tags.iter().all(|(tag, _)| {
226-
let (type_part, name_part, _) = tag.type_name_comment();
227-
type_part.is_some() && name_part.is_some()
228-
});
229-
if !has_all_types_and_names {
230-
return; // Some params lack types or names — don't reorder
224+
225+
// Parse type_name_comment() once per tag, cache the results.
226+
// Each call does O(n) brace-counting, and we'd otherwise call it 4x per tag.
227+
let parsed: Vec<_> = param_tags
228+
.iter()
229+
.map(|(tag, _)| {
230+
let (type_part, name_part, _) = tag.type_name_comment();
231+
(type_part.is_some(), name_part.map(|n| n.parsed()))
232+
})
233+
.collect();
234+
235+
// Check that ALL @param tags have type annotations and names
236+
if !parsed.iter().all(|(has_type, name)| *has_type && name.is_some()) {
237+
return;
231238
}
232239

240+
// Extract the cached names (we verified all are Some above)
241+
let names: Vec<&str> = parsed.iter().map(|(_, name)| name.unwrap_or("")).collect();
242+
233243
// Extract function parameter names from the source text after the comment
234244
let fn_params = extract_function_params(comment, source_text);
235-
if fn_params.len() != param_tags.len() {
245+
if fn_params.len() != names.len() {
236246
return;
237247
}
238248

239249
// Already in order?
240-
if param_tags.iter().zip(fn_params.iter()).all(|((tag, _), p)| {
241-
let (_, name_part, _) = tag.type_name_comment();
242-
name_part.map_or("", |n| n.parsed()) == *p
243-
}) {
250+
if names.iter().zip(fn_params.iter()).all(|(name, p)| *name == *p) {
244251
return;
245252
}
246253

247254
// Check same set of names (lengths already verified equal, param lists are small)
248-
if !param_tags.iter().all(|(tag, _)| {
249-
let (_, name_part, _) = tag.type_name_comment();
250-
let name = name_part.map_or("", |n| n.parsed());
251-
fn_params.contains(&name)
252-
}) {
255+
if !names.iter().all(|name| fn_params.contains(name)) {
253256
return;
254257
}
255258

256-
// Sort @param tags by their position in the function signature
257-
effective_tags[param_start..param_end].sort_by_key(|(tag, _)| {
259+
// Sort @param tags by their position in the function signature.
260+
// Use sort_by_cached_key to call the key function once per element.
261+
effective_tags[param_start..param_end].sort_by_cached_key(|(tag, _)| {
258262
let (_, name_part, _) = tag.type_name_comment();
259263
let name = name_part.map_or("", |n| n.parsed());
260264
fn_params.iter().position(|p| *p == name).unwrap_or(usize::MAX)
@@ -616,23 +620,46 @@ fn sort_tags_by_groups<'a>(
616620
return Vec::new();
617621
}
618622

619-
// Split into groups at TAGS_GROUP_HEAD boundaries, but only when a
623+
// Build normalized list once (avoids calling normalize_tag_kind twice per tag).
624+
let mut normalized: Vec<(&oxc_jsdoc::parser::JSDocTag<'a>, &'a str)> =
625+
tags.iter().map(|tag| (tag, normalize_tag_kind(tag.kind.parsed()))).collect();
626+
627+
// Fast path: check if any group split is actually needed.
628+
// A split only occurs when a TAGS_GROUP_HEAD appears after a TAGS_GROUP_CONDITION.
629+
let mut needs_split = false;
630+
let mut seen_condition = false;
631+
for &(_, kind) in &normalized {
632+
if is_tags_group_condition(kind) {
633+
seen_condition = true;
634+
}
635+
if is_tags_group_head(kind) && seen_condition {
636+
needs_split = true;
637+
break;
638+
}
639+
}
640+
641+
if !needs_split {
642+
// Single group — sort in-place, no Vec-of-Vec overhead
643+
normalized.sort_by_key(|(_, kind)| tag_sort_priority(kind));
644+
return normalized;
645+
}
646+
647+
// Multi-group path: split at TAGS_GROUP_HEAD boundaries when a
620648
// TAGS_GROUP_CONDITION tag has been seen first (matching upstream behavior).
621649
let mut groups: Vec<Vec<(&oxc_jsdoc::parser::JSDocTag<'a>, &'a str)>> = Vec::new();
622650
let mut current_group: Vec<(&oxc_jsdoc::parser::JSDocTag<'a>, &'a str)> = Vec::new();
623651
let mut can_group_next_tags = false;
624652

625-
for tag in tags {
626-
let normalized_kind = normalize_tag_kind(tag.kind.parsed());
627-
if is_tags_group_head(normalized_kind) && can_group_next_tags && !current_group.is_empty() {
653+
for (tag, kind) in normalized {
654+
if is_tags_group_head(kind) && can_group_next_tags && !current_group.is_empty() {
628655
groups.push(current_group);
629656
current_group = Vec::new();
630657
can_group_next_tags = false;
631658
}
632-
if is_tags_group_condition(normalized_kind) {
659+
if is_tags_group_condition(kind) {
633660
can_group_next_tags = true;
634661
}
635-
current_group.push((tag, normalized_kind));
662+
current_group.push((tag, kind));
636663
}
637664
if !current_group.is_empty() {
638665
groups.push(current_group);
@@ -762,6 +789,10 @@ pub fn format_jsdoc_comment<'a>(
762789
// Reorder @param tags to match the function signature order
763790
reorder_param_tags(&mut effective_tags, comment, source_text);
764791

792+
// Pre-build FormatOptions for type formatting — avoids cloning the full
793+
// FormatOptions (which contains heap Vecs) per tag.
794+
let type_format_options = FormatOptions { jsdoc: None, ..format_options.clone() };
795+
765796
// Pre-process @import tags: merge by module, sort, format
766797
let (mut import_lines, parsed_import_indices) = process_import_tags(&effective_tags);
767798
let has_imports = !import_lines.is_empty();
@@ -860,7 +891,7 @@ pub fn format_jsdoc_comment<'a>(
860891
wrap_width,
861892
has_no_space_before_type,
862893
bracket_spacing,
863-
format_options,
894+
&type_format_options,
864895
external_callbacks,
865896
&mut content_lines,
866897
);
@@ -872,7 +903,7 @@ pub fn format_jsdoc_comment<'a>(
872903
wrap_width,
873904
has_no_space_before_type,
874905
bracket_spacing,
875-
format_options,
906+
&type_format_options,
876907
external_callbacks,
877908
&mut content_lines,
878909
);
@@ -1362,9 +1393,9 @@ pub(super) fn format_embedded_js(
13621393
let width = u16::try_from(print_width).unwrap_or(80).clamp(1, 320);
13631394
let line_width = LineWidth::try_from(width).unwrap();
13641395

1365-
// Build options from parent, overriding line_width and disabling JSDoc
1366-
// to prevent recursive formatting
1367-
let make_options = || FormatOptions { line_width, jsdoc: None, ..format_options.clone() };
1396+
// Clone once upfront — subsequent clones of base_options are cheap since
1397+
// the Vec fields (sort_imports, sort_tailwindcss) are already owned.
1398+
let base_options = FormatOptions { line_width, jsdoc: None, ..format_options.clone() };
13681399

13691400
// Try to parse and format with the given source type
13701401
let try_format = |code: &str, source_type: SourceType| -> Option<String> {
@@ -1374,7 +1405,7 @@ pub(super) fn format_embedded_js(
13741405
if ret.panicked || !ret.errors.is_empty() {
13751406
return None;
13761407
}
1377-
let mut formatted = Formatter::new(&allocator, make_options()).build(&ret.program);
1408+
let mut formatted = Formatter::new(&allocator, base_options.clone()).build(&ret.program);
13781409
truncate_trim_end(&mut formatted);
13791410
Some(formatted)
13801411
};
@@ -1393,6 +1424,10 @@ pub(super) fn format_embedded_js(
13931424
let trimmed = code.trim();
13941425
if trimmed.starts_with('{') {
13951426
let wrapped = format!("({trimmed})");
1427+
// Use TrailingCommas::None for object literals since JSON-like code
1428+
// shouldn't have trailing commas
1429+
let obj_options =
1430+
FormatOptions { trailing_commas: TrailingCommas::None, ..base_options.clone() };
13961431

13971432
let try_format_obj = |code: &str, source_type: SourceType| -> Option<String> {
13981433
let allocator = Allocator::default();
@@ -1402,10 +1437,7 @@ pub(super) fn format_embedded_js(
14021437
if ret.panicked || !ret.errors.is_empty() {
14031438
return None;
14041439
}
1405-
// Use TrailingCommas::None for object literals since JSON-like code
1406-
// shouldn't have trailing commas
1407-
let options = FormatOptions { trailing_commas: TrailingCommas::None, ..make_options() };
1408-
let formatted = Formatter::new(&allocator, options).build(&ret.program);
1440+
let formatted = Formatter::new(&allocator, obj_options.clone()).build(&ret.program);
14091441
let formatted = formatted.trim_end();
14101442
// Remove the wrapping parens and trailing semicolon
14111443
if let Some(inner) = formatted.strip_prefix('(')
@@ -1460,8 +1492,9 @@ fn format_type_via_formatter(type_str: &str, format_options: &FormatOptions) ->
14601492
let input = format!("type __t = {type_str};");
14611493

14621494
let allocator = Allocator::default();
1463-
let line_width = format_options.line_width;
1464-
let options = FormatOptions { line_width, jsdoc: None, ..format_options.clone() };
1495+
// The caller is expected to pass pre-built options with jsdoc: None.
1496+
// Clone is cheap here since the expensive Vec fields are already owned.
1497+
let options = format_options.clone();
14651498

14661499
let ret = Parser::new(&allocator, &input, SourceType::tsx())
14671500
.with_options(get_parse_options())
@@ -2468,9 +2501,9 @@ fn format_import_lines(import: &ImportInfo, content_lines: &mut Vec<String>) {
24682501
/// `@import` tags can fall through to `format_generic_tag()`).
24692502
fn process_import_tags(
24702503
tags: &[(&oxc_jsdoc::parser::JSDocTag<'_>, &str)],
2471-
) -> (Vec<String>, rustc_hash::FxHashSet<usize>) {
2504+
) -> (Vec<String>, smallvec::SmallVec<[usize; 4]>) {
24722505
let mut imports = Vec::new();
2473-
let mut parsed_indices = rustc_hash::FxHashSet::default();
2506+
let mut parsed_indices = smallvec::SmallVec::<[usize; 4]>::new();
24742507

24752508
for (idx, &(tag, kind)) in tags.iter().enumerate() {
24762509
if kind != "import" {
@@ -2479,7 +2512,7 @@ fn process_import_tags(
24792512
let comment = tag.comment().parsed();
24802513
if let Some(info) = parse_import_tag(&comment) {
24812514
imports.push(info);
2482-
parsed_indices.insert(idx);
2515+
parsed_indices.push(idx);
24832516
}
24842517
}
24852518

0 commit comments

Comments
 (0)