Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions crates/oxc_parser/src/js/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,12 @@ impl<'a> ParserImpl<'a> {
let token = self.cur_token();
let src = self.cur_src();
let value = match token.kind {
Kind::Decimal | Kind::Binary | Kind::Octal | Kind::Hex => parse_int(src, token.kind),
Kind::Float | Kind::PositiveExponential | Kind::NegativeExponential => parse_float(src),
Kind::Decimal | Kind::Binary | Kind::Octal | Kind::Hex => {
parse_int(src, token.kind, token.has_separator())
}
Kind::Float | Kind::PositiveExponential | Kind::NegativeExponential => {
parse_float(src, token.has_separator())
}
_ => unreachable!(),
}
.map_err(|err| diagnostics::invalid_number(err, token.span()))?;
Expand Down
40 changes: 33 additions & 7 deletions crates/oxc_parser/src/lexer/number.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,35 @@ use std::borrow::Cow;

use super::kind::Kind;

// the string passed in has `_` removed from the lexer
pub fn parse_int(s: &str, kind: Kind) -> Result<f64, &'static str> {
pub fn parse_int(s: &str, kind: Kind, has_sep: bool) -> Result<f64, &'static str> {
let s = if has_sep { Cow::Owned(s.replace('_', "")) } else { Cow::Borrowed(s) };
let s = s.as_ref();
debug_assert!(!s.contains('_'));

// SAFETY: we just checked that `s` has no `_` characters
unsafe { parse_int_without_underscores_unchecked(s, kind) }
}

pub fn parse_float(s: &str, has_sep: bool) -> Result<f64, &'static str> {
let s = if has_sep { Cow::Owned(s.replace('_', "")) } else { Cow::Borrowed(s) };
debug_assert!(!s.contains('_'));

// SAFETY: we just checked that `s` has no `_` characters
unsafe { parse_float_without_underscores_unchecked(&s) }
}

/// # Safety
///
/// This function assumes that all `_` characters have been stripped from `s`.
/// Violating this assumption does _not_ cause UB. However, this function is
/// marked as unsafe to ensure consumers are aware of the assumption.
unsafe fn parse_int_without_underscores_unchecked(
s: &str,
kind: Kind,
) -> Result<f64, &'static str> {
if kind == Kind::Decimal {
return parse_float(s);
return parse_float_without_underscores_unchecked(s);
}
let s = if s.contains('_') { Cow::Owned(s.replace('_', "")) } else { Cow::Borrowed(s) };
let s = s.as_ref();
match kind {
Kind::Binary => Ok(parse_binary(&s[2..])),
Kind::Octal => {
Expand All @@ -28,8 +50,12 @@ pub fn parse_int(s: &str, kind: Kind) -> Result<f64, &'static str> {
}
}

pub fn parse_float(s: &str) -> Result<f64, &'static str> {
let s = if s.contains('_') { Cow::Owned(s.replace('_', "")) } else { Cow::Borrowed(s) };
/// # Safety
///
/// This function assumes that all `_` characters have been stripped from `s`.
/// Violating this assumption does _not_ cause UB. However, this function is
/// marked as unsafe to ensure consumers are aware of the assumption.
unsafe fn parse_float_without_underscores_unchecked(s: &str) -> Result<f64, &'static str> {
s.parse::<f64>().map_err(|_| "invalid float")
}

Expand Down
10 changes: 10 additions & 0 deletions crates/oxc_parser/src/lexer/numeric.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ impl<'a> Lexer<'a> {
match c {
'_' => {
self.consume_char();
// NOTE: it looks invalid numeric tokens are still parsed.
// This seems to be a waste. It also requires us to put this
// call here instead of after we ensure the next character
// is a number character
self.token.set_has_separator();
if self.peek().is_some_and(|c| kind.matches_number_char(c)) {
self.consume_char();
} else {
Expand Down Expand Up @@ -134,6 +139,11 @@ impl<'a> Lexer<'a> {
match c {
'_' => {
self.consume_char();
// NOTE: it looks invalid numeric tokens are still parsed.
// This seems to be a waste. It also requires us to put this
// call here instead of after we ensure the next character
// is an ASCII digit
self.token.set_has_separator();
if self.peek().is_some_and(|c| c.is_ascii_digit()) {
self.consume_char();
} else {
Expand Down
19 changes: 18 additions & 1 deletion crates/oxc_parser/src/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,16 @@ pub struct Token {
/// [Lexer::escaped_templates]: [super::Lexer::escaped_templates]
pub escaped: bool,

/// True if for numeric literal tokens that contain separator characters (`_`).
///
/// Numeric literals are defined in Section 12.9.3 of the ECMAScript
/// standard and include [`Kind::Decimal`], [`Kind::Binary`],
/// [`Kind::Octal`], [`Kind::Hex`], etc.
has_separator: bool,

// Padding to fill to 16 bytes.
// This makes copying a `Token` 1 x xmmword load & store, rather than 1 x dword + 1 x qword
// and `Token::default()` is 1 x xmmword store, rather than 1 x dword + 1 x qword.
_padding1: u8,
_padding2: u32,
}

Expand All @@ -50,4 +56,15 @@ impl Token {
pub fn escaped(&self) -> bool {
self.escaped
}

#[inline]
pub fn has_separator(&self) -> bool {
debug_assert!(!self.has_separator || self.kind.is_number());
self.has_separator
}

pub(crate) fn set_has_separator(&mut self) {
debug_assert!(!self.has_separator || self.kind.is_number() || self.kind == Kind::default());
self.has_separator = true;
}
}