Skip to content

Commit 1aed99b

Browse files
committed
perf(transformer/jsx): use memchr for parsing JSX pragma comments (#11001)
Use `memchr` for finding `@` when parsing JSX pragmas from comments. This wins back most (but not all) of the perf loss of #10983 on `antd.js` benchmark, and preserves the perf gain of #10983 on `cal.com.tsx` benchmark. Interestingly, using `memchr` to search just for `@` and then checking next 3 bytes are `jsx` separately is measurably faster than using `memchr::memmem::Finder` to search for `@jsx`.
1 parent 6540f44 commit 1aed99b

3 files changed

Lines changed: 23 additions & 11 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/oxc_transformer/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ compact_str = { workspace = true }
4040
cow-utils = { workspace = true }
4141
indexmap = { workspace = true }
4242
itoa = { workspace = true }
43+
memchr = { workspace = true }
4344
rustc-hash = { workspace = true }
4445
serde = { workspace = true, features = ["derive"] }
4546
serde_json = { workspace = true }

crates/oxc_transformer/src/jsx/comments.rs

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
use std::borrow::Cow;
22

3+
use memchr::memchr;
4+
35
use oxc_ast::Comment;
46

57
use crate::{JsxOptions, JsxRuntime, TransformCtx, TypeScriptOptions};
@@ -96,20 +98,26 @@ enum PragmaType {
9698
fn find_jsx_pragma(mut comment_str: &str) -> Option<(PragmaType, &str, &str)> {
9799
let pragma_type;
98100
loop {
99-
// Search for `@jsx`.
100-
let mut at_sign_index = None;
101-
for (index, next4) in comment_str.as_bytes().windows(4).enumerate() {
102-
if next4 == b"@jsx" {
103-
at_sign_index = Some(index);
104-
break;
105-
}
101+
// Search for `@`.
102+
// Note: Using `memchr::memmem::Finder` to search for `@jsx` is slower than only using `memchr`
103+
// to find `@` characters, and then checking if `@` is followed by `jsx` separately.
104+
let at_sign_index = memchr(b'@', comment_str.as_bytes())?;
105+
106+
// Check `@` is start of `@jsx`.
107+
// Note: Checking 4 bytes including leading `@` is faster than checking the 3 bytes after `@`,
108+
// because 4 bytes is a `u32`.
109+
let next4 = comment_str.as_bytes().get(at_sign_index..at_sign_index + 4)?;
110+
if next4 != b"@jsx" {
111+
// Not `@jsx`. Trim off up to and including `@` and search again.
112+
// SAFETY: Byte at `at_sign_index` is `@`, so `at_sign_index + 1` is either within string
113+
// or end of string, and on a UTF-8 char boundary.
114+
comment_str = unsafe { comment_str.get_unchecked(at_sign_index + 1..) };
115+
continue;
106116
}
107-
// Exit if not found
108-
let at_sign_index = at_sign_index?;
109117

110-
// Trim `@jsx` from start of `comment_str`.
118+
// Trim `@jsx` and everything before it from start of `comment_str`.
111119
// SAFETY: 4 bytes starting at `at_sign_index` are `@jsx`, so `at_sign_index + 4` is within string
112-
// or end of string, and must be on a UTF-8 character boundary
120+
// or end of string, and must be on a UTF-8 character boundary.
113121
comment_str = unsafe { comment_str.get_unchecked(at_sign_index + 4..) };
114122

115123
// Get rest of keyword e.g. `Runtime` in `@jsxRuntime`
@@ -230,6 +238,8 @@ mod tests {
230238
("@jsxX @jsx h @jsxX", &[(PragmaType::Jsx, "h")]),
231239
("@jsxMoon @jsx h @jsxMoon", &[(PragmaType::Jsx, "h")]),
232240
("@jsx @jsx h", &[(PragmaType::Jsx, "@jsx")]),
241+
// Multiple `@` signs
242+
("@@@@@jsx h", &[(PragmaType::Jsx, "h")]),
233243
];
234244

235245
let prefixes = ["", " ", "\n\n", "*\n* "];

0 commit comments

Comments
 (0)