Skip to content

Commit ae9cc71

Browse files
committed
Remove unnecessary check when reading keywords
1 parent 5305486 commit ae9cc71

File tree

3 files changed

+98
-51
lines changed

3 files changed

+98
-51
lines changed

crates/swc_ecma_lexer/src/common/lexer/mod.rs

Lines changed: 56 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2110,20 +2110,16 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
21102110
}
21112111
}
21122112

2113-
/// This can be used if there's no keyword starting with the first
2114-
/// character.
2115-
fn read_word_with(
2113+
fn read_keyword_with(
21162114
&mut self,
21172115
convert: &dyn Fn(&str) -> Option<Self::Token>,
21182116
) -> LexResult<Option<Self::Token>> {
21192117
debug_assert!(self.cur().is_some());
21202118

21212119
let start = self.cur_pos();
2122-
let (word, has_escape) = self.read_word_as_str_with(|l, s, _, can_be_known| {
2123-
if can_be_known {
2124-
if let Some(word) = convert(s) {
2125-
return word;
2126-
}
2120+
let (word, has_escape) = self.read_keyword_as_str_with(|l, s, _, _| {
2121+
if let Some(word) = convert(s) {
2122+
return word;
21272123
}
21282124
let atom = l.atom(s);
21292125
Self::Token::unknown_ident(atom, l)
@@ -2133,14 +2129,65 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
21332129
// 'await' and 'yield' may have semantic of reserved word, which means lexer
21342130
// should know context or parser should handle this error. Our approach to this
21352131
// problem is former one.
2136-
21372132
if has_escape && word.is_reserved(self.ctx()) {
21382133
let word = word.into_atom(self).unwrap();
21392134
self.error(start, SyntaxError::EscapeInReservedWord { word })?
21402135
} else {
21412136
Ok(Some(word))
21422137
}
21432138
}
2139+
2140+
/// This is a performant version of [Lexer::read_word_as_str_with] for
2141+
/// reading keywords. We should make sure the first byte is a valid
2142+
/// ASCII.
2143+
fn read_keyword_as_str_with<F, Ret>(&mut self, convert: F) -> LexResult<(Ret, bool)>
2144+
where
2145+
F: FnOnce(&mut Self, &str, bool, bool) -> Ret,
2146+
{
2147+
let slice_start = self.cur_pos();
2148+
let has_escape = false;
2149+
2150+
// Fast path: try to scan ASCII identifier using byte_search
2151+
// Performance optimization: check if first char disqualifies as keyword
2152+
// Advance past first byte
2153+
self.bump();
2154+
2155+
// Use byte_search to quickly scan to end of ASCII identifier
2156+
let next_byte = byte_search! {
2157+
lexer: self,
2158+
table: NOT_ASCII_ID_CONTINUE_TABLE,
2159+
handle_eof: {
2160+
// Reached EOF, entire remainder is identifier
2161+
let end = self.cur_pos();
2162+
let s = unsafe {
2163+
// Safety: slice_start and end are valid position because we got them from
2164+
// `self.input`
2165+
self.input_slice(slice_start, end)
2166+
};
2167+
2168+
return Ok((convert(self, s, false, true), false));
2169+
},
2170+
};
2171+
2172+
// Check if we hit end of identifier or need to fall back to slow path
2173+
if !next_byte.is_ascii() {
2174+
// Hit Unicode character, fall back to slow path from current position
2175+
self.read_word_as_str_with_slow_path(convert, slice_start, has_escape, true)
2176+
} else if next_byte == b'\\' {
2177+
// Hit escape sequence, fall back to slow path from current position
2178+
self.read_word_as_str_with_slow_path(convert, slice_start, has_escape, true)
2179+
} else {
2180+
// Hit end of identifier (non-continue ASCII char)
2181+
let end = self.cur_pos();
2182+
let s = unsafe {
2183+
// Safety: slice_start and end are valid position because we got them from
2184+
// `self.input`
2185+
self.input_slice(slice_start, end)
2186+
};
2187+
2188+
return Ok((convert(self, s, has_escape, true), has_escape));
2189+
}
2190+
}
21442191
}
21452192

21462193
pub fn pos_span(p: BytePos) -> Span {

crates/swc_ecma_lexer/src/lexer/table.rs

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ const ERR: ByteHandler = Some(|lexer| {
6464
const IDN: ByteHandler = Some(|lexer| lexer.read_ident_unknown().map(Some));
6565

6666
const L_A: ByteHandler = Some(|lexer| {
67-
lexer.read_word_with(&|s| match s {
67+
lexer.read_keyword_with(&|s| match s {
6868
"abstract" => Some(Token::Word(Word::Ident(IdentLike::Known(
6969
KnownIdent::Abstract,
7070
)))),
@@ -88,7 +88,7 @@ const L_A: ByteHandler = Some(|lexer| {
8888
});
8989

9090
const L_B: ByteHandler = Some(|lexer| {
91-
lexer.read_word_with(&|s| match s {
91+
lexer.read_keyword_with(&|s| match s {
9292
"break" => Some(Token::Word(Word::Keyword(Keyword::Break))),
9393
"boolean" => Some(Token::Word(Word::Ident(IdentLike::Known(
9494
KnownIdent::Boolean,
@@ -101,7 +101,7 @@ const L_B: ByteHandler = Some(|lexer| {
101101
});
102102

103103
const L_C: ByteHandler = Some(|lexer| {
104-
lexer.read_word_with(&|s| match s {
104+
lexer.read_keyword_with(&|s| match s {
105105
"case" => Some(Token::Word(Word::Keyword(Keyword::Case))),
106106
"catch" => Some(Token::Word(Word::Keyword(Keyword::Catch))),
107107
"class" => Some(Token::Word(Word::Keyword(Keyword::Class))),
@@ -112,7 +112,7 @@ const L_C: ByteHandler = Some(|lexer| {
112112
});
113113

114114
const L_D: ByteHandler = Some(|lexer| {
115-
lexer.read_word_with(&|s| match s {
115+
lexer.read_keyword_with(&|s| match s {
116116
"debugger" => Some(Token::Word(Word::Keyword(Keyword::Debugger))),
117117
"default" => Some(Token::Word(Word::Keyword(Keyword::Default_))),
118118
"delete" => Some(Token::Word(Word::Keyword(Keyword::Delete))),
@@ -125,7 +125,7 @@ const L_D: ByteHandler = Some(|lexer| {
125125
});
126126

127127
const L_E: ByteHandler = Some(|lexer| {
128-
lexer.read_word_with(&|s| match s {
128+
lexer.read_keyword_with(&|s| match s {
129129
"else" => Some(Token::Word(Word::Keyword(Keyword::Else))),
130130
"enum" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Enum)))),
131131
"export" => Some(Token::Word(Word::Keyword(Keyword::Export))),
@@ -135,7 +135,7 @@ const L_E: ByteHandler = Some(|lexer| {
135135
});
136136

137137
const L_F: ByteHandler = Some(|lexer| {
138-
lexer.read_word_with(&|s| match s {
138+
lexer.read_keyword_with(&|s| match s {
139139
"false" => Some(Token::Word(Word::False)),
140140
"finally" => Some(Token::Word(Word::Keyword(Keyword::Finally))),
141141
"for" => Some(Token::Word(Word::Keyword(Keyword::For))),
@@ -146,7 +146,7 @@ const L_F: ByteHandler = Some(|lexer| {
146146
});
147147

148148
const L_G: ByteHandler = Some(|lexer| {
149-
lexer.read_word_with(&|s| match s {
149+
lexer.read_keyword_with(&|s| match s {
150150
"global" => Some(Token::Word(Word::Ident(IdentLike::Known(
151151
KnownIdent::Global,
152152
)))),
@@ -158,7 +158,7 @@ const L_G: ByteHandler = Some(|lexer| {
158158
const L_H: ByteHandler = IDN;
159159

160160
const L_I: ByteHandler = Some(|lexer| {
161-
lexer.read_word_with(&|s| match s {
161+
lexer.read_keyword_with(&|s| match s {
162162
"if" => Some(Token::Word(Word::Keyword(Keyword::If))),
163163
"import" => Some(Token::Word(Word::Keyword(Keyword::Import))),
164164
"in" => Some(Token::Word(Word::Keyword(Keyword::In))),
@@ -183,7 +183,7 @@ const L_I: ByteHandler = Some(|lexer| {
183183
const L_J: ByteHandler = IDN;
184184

185185
const L_K: ByteHandler = Some(|lexer| {
186-
lexer.read_word_with(&|s| match s {
186+
lexer.read_keyword_with(&|s| match s {
187187
"keyof" => Some(Token::Word(Word::Ident(IdentLike::Known(
188188
KnownIdent::Keyof,
189189
)))),
@@ -192,21 +192,21 @@ const L_K: ByteHandler = Some(|lexer| {
192192
});
193193

194194
const L_L: ByteHandler = Some(|lexer| {
195-
lexer.read_word_with(&|s| match s {
195+
lexer.read_keyword_with(&|s| match s {
196196
"let" => Some(Token::Word(Word::Keyword(Keyword::Let))),
197197
_ => None,
198198
})
199199
});
200200

201201
const L_M: ByteHandler = Some(|lexer| {
202-
lexer.read_word_with(&|s| match s {
202+
lexer.read_keyword_with(&|s| match s {
203203
"meta" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Meta)))),
204204
_ => None,
205205
})
206206
});
207207

208208
const L_N: ByteHandler = Some(|lexer| {
209-
lexer.read_word_with(&|s| match s {
209+
lexer.read_keyword_with(&|s| match s {
210210
"new" => Some(Token::Word(Word::Keyword(Keyword::New))),
211211
"null" => Some(Token::Word(Word::Null)),
212212
"number" => Some(Token::Word(Word::Ident(IdentLike::Known(
@@ -223,7 +223,7 @@ const L_N: ByteHandler = Some(|lexer| {
223223
});
224224

225225
const L_O: ByteHandler = Some(|lexer| {
226-
lexer.read_word_with(&|s| match s {
226+
lexer.read_keyword_with(&|s| match s {
227227
"of" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Of)))),
228228
"object" => Some(Token::Word(Word::Ident(IdentLike::Known(
229229
KnownIdent::Object,
@@ -233,7 +233,7 @@ const L_O: ByteHandler = Some(|lexer| {
233233
});
234234

235235
const L_P: ByteHandler = Some(|lexer| {
236-
lexer.read_word_with(&|s| match s {
236+
lexer.read_keyword_with(&|s| match s {
237237
"public" => Some(Token::Word(Word::Ident(IdentLike::Known(
238238
KnownIdent::Public,
239239
)))),
@@ -253,7 +253,7 @@ const L_P: ByteHandler = Some(|lexer| {
253253
const L_Q: ByteHandler = IDN;
254254

255255
const L_R: ByteHandler = Some(|lexer| {
256-
lexer.read_word_with(&|s| match s {
256+
lexer.read_keyword_with(&|s| match s {
257257
"return" => Some(Token::Word(Word::Keyword(Keyword::Return))),
258258
"readonly" => Some(Token::Word(Word::Ident(IdentLike::Known(
259259
KnownIdent::Readonly,
@@ -266,7 +266,7 @@ const L_R: ByteHandler = Some(|lexer| {
266266
});
267267

268268
const L_S: ByteHandler = Some(|lexer| {
269-
lexer.read_word_with(&|s| match s {
269+
lexer.read_keyword_with(&|s| match s {
270270
"super" => Some(Token::Word(Word::Keyword(Keyword::Super))),
271271
"static" => Some(Token::Word(Word::Ident(IdentLike::Known(
272272
KnownIdent::Static,
@@ -287,7 +287,7 @@ const L_S: ByteHandler = Some(|lexer| {
287287
});
288288

289289
const L_T: ByteHandler = Some(|lexer| {
290-
lexer.read_word_with(&|s| match s {
290+
lexer.read_keyword_with(&|s| match s {
291291
"this" => Some(Token::Word(Word::Keyword(Keyword::This))),
292292
"throw" => Some(Token::Word(Word::Keyword(Keyword::Throw))),
293293
"true" => Some(Token::Word(Word::True)),
@@ -302,7 +302,7 @@ const L_T: ByteHandler = Some(|lexer| {
302302
});
303303

304304
const L_U: ByteHandler = Some(|lexer| {
305-
lexer.read_word_with(&|s| match s {
305+
lexer.read_keyword_with(&|s| match s {
306306
"using" => Some(Token::Word(Word::Ident(IdentLike::Known(
307307
KnownIdent::Using,
308308
)))),
@@ -320,15 +320,15 @@ const L_U: ByteHandler = Some(|lexer| {
320320
});
321321

322322
const L_V: ByteHandler = Some(|lexer| {
323-
lexer.read_word_with(&|s| match s {
323+
lexer.read_keyword_with(&|s| match s {
324324
"var" => Some(Token::Word(Word::Keyword(Keyword::Var))),
325325
"void" => Some(Token::Word(Word::Keyword(Keyword::Void))),
326326
_ => None,
327327
})
328328
});
329329

330330
const L_W: ByteHandler = Some(|lexer| {
331-
lexer.read_word_with(&|s| match s {
331+
lexer.read_keyword_with(&|s| match s {
332332
"while" => Some(Token::Word(Word::Keyword(Keyword::While))),
333333
"with" => Some(Token::Word(Word::Keyword(Keyword::With))),
334334
_ => None,
@@ -338,7 +338,7 @@ const L_W: ByteHandler = Some(|lexer| {
338338
const L_X: ByteHandler = IDN;
339339

340340
const L_Y: ByteHandler = Some(|lexer| {
341-
lexer.read_word_with(&|s| match s {
341+
lexer.read_keyword_with(&|s| match s {
342342
"yield" => Some(Token::Word(Word::Keyword(Keyword::Yield))),
343343
_ => None,
344344
})

0 commit comments

Comments
 (0)