Skip to content

Commit 5ea5f77

Browse files
committed
Fix lexer & parser
1 parent 38132e0 commit 5ea5f77

File tree

3 files changed

+44
-94
lines changed

3 files changed

+44
-94
lines changed

crates/swc_ecma_lexer/src/common/lexer/mod.rs

Lines changed: 41 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1112,26 +1112,31 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
11121112
op(self, &mut buf)
11131113
}
11141114

1115-
fn read_unicode_escape(&mut self) -> LexResult<Vec<Char>> {
1115+
fn read_unicode_escape(&mut self) -> LexResult<Char> {
11161116
debug_assert_eq!(self.cur(), Some('u'));
11171117

1118-
let mut chars = Vec::with_capacity(4);
1119-
let mut is_curly = false;
1120-
11211118
self.bump(); // 'u'
11221119

1123-
if self.eat(b'{') {
1124-
is_curly = true;
1125-
}
1120+
let is_curly = self.eat(b'{');
11261121

1127-
let state = self.input().cur_pos();
11281122
let c = match self.read_int_u32::<16>(if is_curly { 0 } else { 4 }) {
11291123
Ok(Some(val)) => {
1130-
if 0x0010_ffff >= val {
1131-
char::from_u32(val)
1124+
if val <= 0x0010_ffff {
1125+
char::from_u32(val).ok_or_else(|| {
1126+
let start = self.cur_pos();
1127+
crate::error::Error::new(
1128+
pos_span(start),
1129+
SyntaxError::BadCharacterEscapeSequence {
1130+
expected: if is_curly {
1131+
"1-6 hex characters in the range 0 to 10FFFF."
1132+
} else {
1133+
"4 hex characters"
1134+
},
1135+
},
1136+
)
1137+
})?
11321138
} else {
11331139
let start = self.cur_pos();
1134-
11351140
self.error(
11361141
start,
11371142
SyntaxError::BadCharacterEscapeSequence {
@@ -1146,7 +1151,6 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
11461151
}
11471152
_ => {
11481153
let start = self.cur_pos();
1149-
11501154
self.error(
11511155
start,
11521156
SyntaxError::BadCharacterEscapeSequence {
@@ -1160,54 +1164,12 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
11601164
}
11611165
};
11621166

1163-
match c {
1164-
Some(c) => {
1165-
chars.push(c.into());
1166-
}
1167-
_ => {
1168-
unsafe {
1169-
// Safety: state is valid position because we got it from cur_pos()
1170-
self.input_mut().reset_to(state);
1171-
}
1172-
1173-
chars.push(Char::from('\\'));
1174-
chars.push(Char::from('u'));
1175-
1176-
if is_curly {
1177-
chars.push(Char::from('{'));
1178-
1179-
for _ in 0..6 {
1180-
if let Some(c) = self.input().cur() {
1181-
if c == '}' {
1182-
break;
1183-
}
1184-
1185-
self.bump();
1186-
1187-
chars.push(Char::from(c));
1188-
} else {
1189-
break;
1190-
}
1191-
}
1192-
1193-
chars.push(Char::from('}'));
1194-
} else {
1195-
for _ in 0..4 {
1196-
if let Some(c) = self.input().cur() {
1197-
self.bump();
1198-
1199-
chars.push(Char::from(c));
1200-
}
1201-
}
1202-
}
1203-
}
1204-
}
1205-
12061167
if is_curly && !self.eat(b'}') {
1207-
self.error(state, SyntaxError::InvalidUnicodeEscape)?
1168+
let start = self.cur_pos();
1169+
self.error(start, SyntaxError::InvalidUnicodeEscape)?
12081170
}
12091171

1210-
Ok(chars)
1172+
Ok(c.into())
12111173
}
12121174

12131175
#[cold]
@@ -1337,9 +1299,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
13371299
match self.read_escaped_char(true) {
13381300
Ok(Some(chars)) => {
13391301
if let Ok(ref mut cooked) = cooked {
1340-
for c in chars {
1341-
cooked.extend(c);
1342-
}
1302+
cooked.extend(chars);
13431303
}
13441304
}
13451305
Ok(None) => {}
@@ -1358,7 +1318,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
13581318
/// Read an escaped character for string literal.
13591319
///
13601320
/// In template literal, we should preserve raw string.
1361-
fn read_escaped_char(&mut self, in_template: bool) -> LexResult<Option<Vec<Char>>> {
1321+
fn read_escaped_char(&mut self, in_template: bool) -> LexResult<Option<Char>> {
13621322
debug_assert_eq!(self.cur(), Some('\\'));
13631323

13641324
let start = self.cur_pos();
@@ -1396,7 +1356,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
13961356
self.bump(); // 'x'
13971357

13981358
match self.read_int_u32::<16>(2)? {
1399-
Some(val) => return Ok(Some(vec![Char::from(val)])),
1359+
Some(val) => return Ok(Some(Char::from(val))),
14001360
None => self.error(
14011361
start,
14021362
SyntaxError::BadCharacterEscapeSequence {
@@ -1407,10 +1367,10 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
14071367
}
14081368

14091369
// read unicode escape sequences
1410-
'u' => match self.read_unicode_escape() {
1411-
Ok(chars) => return Ok(Some(chars)),
1412-
Err(err) => self.error(start, err.into_kind())?,
1413-
},
1370+
'u' => {
1371+
let c = self.read_unicode_escape()?;
1372+
return Ok(Some(c));
1373+
}
14141374

14151375
// octal escape sequences
14161376
'0'..='7' => {
@@ -1420,7 +1380,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
14201380
match self.cur() {
14211381
Some(next) if next.is_digit(8) => c,
14221382
// \0 is not an octal literal nor decimal literal.
1423-
_ => return Ok(Some(vec!['\u{0000}'.into()])),
1383+
_ => return Ok(Some('\u{0000}'.into())),
14241384
}
14251385
} else {
14261386
c
@@ -1447,23 +1407,23 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
14471407
.and_then(|value| value.checked_add(v as u8));
14481408
match new_val {
14491409
Some(val) => val,
1450-
None => return Ok(Some(vec![Char::from(value as char)])),
1410+
None => return Ok(Some(Char::from(value as char))),
14511411
}
14521412
} else {
14531413
value * 8 + v as u8
14541414
};
14551415

14561416
self.bump();
14571417
}
1458-
_ => return Ok(Some(vec![Char::from(value as u32)])),
1418+
_ => return Ok(Some(Char::from(value as u32))),
14591419
}
14601420
}};
14611421
}
14621422

14631423
one!(false);
14641424
one!(true);
14651425

1466-
return Ok(Some(vec![Char::from(value as char)]));
1426+
return Ok(Some(Char::from(value as char)));
14671427
}
14681428
_ => c,
14691429
};
@@ -1473,7 +1433,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
14731433
self.input_mut().bump();
14741434
}
14751435

1476-
Ok(Some(vec![c.into()]))
1436+
Ok(Some(c.into()))
14771437
}
14781438

14791439
/// Expects current char to be '/'
@@ -1688,23 +1648,19 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
16881648
}
16891649
}
16901650

1691-
let chars = l.read_unicode_escape()?;
1651+
let c = l.read_unicode_escape()?;
16921652

1693-
if let Some(c) = chars.first() {
1694-
let valid = if first {
1695-
c.is_ident_start()
1696-
} else {
1697-
c.is_ident_part()
1698-
};
1653+
let valid = if first {
1654+
c.is_ident_start()
1655+
} else {
1656+
c.is_ident_part()
1657+
};
16991658

1700-
if !valid {
1701-
l.emit_error(start, SyntaxError::InvalidIdentChar);
1702-
}
1659+
if !valid {
1660+
l.emit_error(start, SyntaxError::InvalidIdentChar);
17031661
}
17041662

1705-
for c in chars {
1706-
buf.extend(c);
1707-
}
1663+
buf.extend(c);
17081664

17091665
slice_start = l.cur_pos();
17101666
continue;
@@ -2063,9 +2019,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
20632019
}
20642020

20652021
if let Some(chars) = l.read_escaped_char(false)? {
2066-
for c in chars {
2067-
buf.extend(c);
2068-
}
2022+
buf.extend(chars);
20692023
}
20702024

20712025
slice_start = l.cur_pos();

crates/swc_ecma_parser/src/lexer/mod.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -405,9 +405,7 @@ impl Lexer<'_> {
405405
match self.read_escaped_char(true) {
406406
Ok(Some(chars)) => {
407407
if let Ok(ref mut cooked) = cooked {
408-
for c in chars {
409-
cooked.extend(c);
410-
}
408+
cooked.extend(chars);
411409
}
412410
}
413411
Ok(None) => {}

crates/swc_ecma_parser/src/lexer/state.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -521,13 +521,11 @@ impl Lexer<'_> {
521521
continue;
522522
}
523523
self.bump(); // bump 'u'
524-
let Ok(chars) = self.read_unicode_escape() else {
524+
let Ok(c) = self.read_unicode_escape() else {
525525
self.emit_error(self.cur_pos(), SyntaxError::InvalidUnicodeEscape);
526526
break;
527527
};
528-
for c in chars {
529-
v.extend(c);
530-
}
528+
v.extend(c);
531529
self.token_flags |= swc_ecma_lexer::lexer::TokenFlags::UNICODE;
532530
} else {
533531
break;

0 commit comments

Comments
 (0)