Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions src/Esprima/Character.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,15 @@ internal static bool IsIdentifierStart(string s, int index)
return IsIdentifierStartUnicodeCategory(CharUnicodeInfo.GetUnicodeCategory(s, index));
}

internal static bool IsIdentifierStart(int cp)
{
#if NETSTANDARD2_1_OR_GREATER
return IsIdentifierStartUnicodeCategory(CharUnicodeInfo.GetUnicodeCategory(cp));
#else
return true;
#endif
}

internal static bool IsIdentifierPart(char ch)
{
return (_characterData[ch] & (byte) CharacterMask.IdentifierPart) != 0;
Expand All @@ -58,6 +67,15 @@ internal static bool IsIdentifierPart(string s, int index)
return IsIdentifierPartUnicodeCategory(CharUnicodeInfo.GetUnicodeCategory(s, index));
}

internal static bool IsIdentifierPart(int cp)
{
#if NETSTANDARD2_1_OR_GREATER
return IsIdentifierPartUnicodeCategory(CharUnicodeInfo.GetUnicodeCategory(cp));
#else
return true;
#endif
}

// https://tc39.github.io/ecma262/#sec-literals-numeric-literals

internal static readonly Func<char, bool> IsDecimalDigitFunc = IsDecimalDigit;
Expand Down
71 changes: 37 additions & 34 deletions src/Esprima/Scanner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -56,30 +56,6 @@ public sealed partial class Scanner

internal StringPool _stringPool;

private static int HexValue(char ch)
{
if (ch >= 'A')
{
if (ch >= 'a')
{
if (ch <= 'h')
{
return ch - 'a' + 10;
}
}
else if (ch <= 'H')
{
return ch - 'A' + 10;
}
}
else if (ch <= '9')
{
return ch - '0';
}

return 0;
}

private static int OctalValue(char ch)
{
return ch - '0';
Expand Down Expand Up @@ -566,7 +542,7 @@ private bool ScanHexEscape(char prefix, out char result)
var d = _source[_index];
if (Character.IsHexDigit(d))
{
code = code * 16 + HexValue(d);
code = code * 16 + HexConverter.FromChar(d);
_index++;
}
else
Expand All @@ -586,10 +562,10 @@ private bool ScanHexEscape(char prefix, out char result)
return true;
}

private string? TryToScanUnicodeCodePointEscape()
private string? TryScanUnicodeCodePointEscape(out int code)
{
var ch = _source[_index];
var code = 0;
code = 0;

// At least, one hex digit is required.
if (ch == '}')
Expand All @@ -605,26 +581,38 @@ private bool ScanHexEscape(char prefix, out char result)
break;
}

code = code * 16 + HexValue(ch);
try { code = checked(code * 16 + HexConverter.FromChar(ch)); }
catch (OverflowException) { return null; }
}

// Character.FromCodePoint (more precisely, the underlying char.ConvertFromUtf32 call) accepts
// ranges [U+0000..U+D7FF] and [U+E000..U+10FFFF] only.
// See also: https://github.com/dotnet/runtime/blob/v6.0.14/src/libraries/System.Private.CoreLib/src/System/Text/UnicodeUtility.cs#L169

if (code > 0x10FFFF || ch != '}')
{
return null;
}

// This range is valid in literals (e.g. "a\u{d800}\u{dc00}") but not valid in identifiers (e.g. a\u{d800}\u{dc00}).
// Let's return it in both cases and let Character.IsIdentifierStart/IsIdentifierPart deal with it.
if (code is >= 0xD800 and <= 0xDFFF)
{
return ParserExtensions.CharToString((char) code);
}

return Character.FromCodePoint(code);
}

private string ScanUnicodeCodePointEscape()
{
var result = TryToScanUnicodeCodePointEscape();
var result = TryScanUnicodeCodePointEscape(out _);
if (result is null)
{
ThrowUnexpectedToken();
}

return result!;
return result;
}

private string GetIdentifier()
Expand Down Expand Up @@ -671,7 +659,8 @@ private string GetComplexIdentifier()
_index += id.Length;

// '\u' (U+005C, U+0075) denotes an escaped character.
string ch;
string? ch;
int chcp;
if (cp == 0x5C)
{
if (_source.CharCodeAt(_index) != 0x75)
Expand All @@ -683,7 +672,14 @@ private string GetComplexIdentifier()
if (_source[_index] == '{')
{
++_index;
ch = ScanUnicodeCodePointEscape();
ch = TryScanUnicodeCodePointEscape(out chcp);
if (ch is null
|| (ch.Length == 1
? !Character.IsIdentifierStart(ch[0])
: !Character.IsIdentifierStart(chcp)))
{
ThrowUnexpectedToken();
}
}
else
{
Expand Down Expand Up @@ -728,7 +724,14 @@ private string GetComplexIdentifier()
if (_index < _source.Length && _source[_index] == '{')
{
++_index;
ch = ScanUnicodeCodePointEscape();
ch = TryScanUnicodeCodePointEscape(out chcp);
if (ch is null
|| (ch.Length == 1
? char.IsLowSurrogate(ch[0]) || !Character.IsIdentifierPart(ch[0])
: !Character.IsIdentifierPart(chcp)))
{
ThrowUnexpectedToken();
}
}
else
{
Expand Down Expand Up @@ -1545,7 +1548,7 @@ private Token ScanTemplate()
if (_source[_index] == '{')
{
++_index;
var unicodeCodePointEscape = TryToScanUnicodeCodePointEscape();
var unicodeCodePointEscape = TryScanUnicodeCodePointEscape(out _);
if (unicodeCodePointEscape is null)
{
notEscapeSequenceHead = 'u';
Expand Down
25 changes: 25 additions & 0 deletions test/Esprima.Tests/ScannerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,29 @@ public void CanResetScannerToCustomPosition()
Assert.Equal(new object[] { "foo", "=", 1.0, ";" }, tokens.Select(t => t.Value).ToArray());
Assert.Equal(new string[] { " c1 ", " c2" }, comments.Select(c => scanner.Code.AsSpan(c.Slice.Start, c.Slice.Length).ToString()).ToArray());
}

[Fact]
public void ShouldRejectSurrogateRangeAsIdentifierStart()
{
var scanner = new Scanner(@"\u{d800}\u{dc00}");
var ex = Assert.Throws<ParserException>(new Func<object>(() => scanner.Lex()));
Assert.Equal(Messages.UnexpectedTokenIllegal, ex.Error?.Description);
}

[Fact]
public void ShouldRejectSurrogateRangeAsIdentifierPart()
{
var scanner = new Scanner(@"a\u{d800}\u{dc00}");
var ex = Assert.Throws<ParserException>(new Func<object>(() => scanner.Lex()));
Assert.Equal(Messages.UnexpectedTokenIllegal, ex.Error?.Description);
}

[Fact]
public void ShouldAcceptSurrogateRangeInLiterals()
{
var scanner = new Scanner(@"'a\u{d800}\u{dc00}'");
var token = scanner.Lex();
Assert.Equal(TokenType.StringLiteral, token.Type);
Assert.Equal("a\ud800\udc00", token.Value);
}
}