Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/kinds.jl
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ register_kinds!(JuliaSyntax, 0, [
"ErrorOverLongCharacter"
"ErrorInvalidUTF8"
"ErrorInvisibleChar"
"ErrorIdentifierStart"
"ErrorUnknownCharacter"
"ErrorBidiFormatting"
# Generic error
Expand Down Expand Up @@ -1175,6 +1176,7 @@ const _token_error_descriptions = Dict{Kind, String}(
K"ErrorOverLongCharacter"=>"character literal contains multiple characters",
K"ErrorInvalidUTF8"=>"invalid UTF-8 sequence",
K"ErrorInvisibleChar"=>"invisible character",
K"ErrorIdentifierStart" => "identifier cannot begin with character",
K"ErrorUnknownCharacter"=>"unknown unicode character",
K"ErrorBidiFormatting"=>"unbalanced bidirectional unicode formatting",
K"ErrorInvalidOperator" => "invalid operator",
Expand Down
2 changes: 1 addition & 1 deletion src/parse_stream.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1051,7 +1051,7 @@ function validate_tokens(stream::ParseStream)
elseif is_error(k) && k != K"error"
# Emit messages for non-generic token errors
tokstr = String(txtbuf[tokrange])
msg = if k in KSet"ErrorInvisibleChar ErrorUnknownCharacter"
msg = if k in KSet"ErrorInvisibleChar ErrorUnknownCharacter ErrorIdentifierStart"
"$(_token_error_descriptions[k]) $(repr(tokstr[1]))"
elseif k in KSet"ErrorInvalidUTF8 ErrorBidiFormatting"
"$(_token_error_descriptions[k]) $(repr(tokstr))"
Expand Down
5 changes: 3 additions & 2 deletions src/tokenize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -538,8 +538,9 @@ function _next_token(l::Lexer, c)
return emit(l, k)
else
emit(l,
!isvalid(c) ? K"ErrorInvalidUTF8" :
is_invisible_char(c) ? K"ErrorInvisibleChar" :
!isvalid(c) ? K"ErrorInvalidUTF8" :
is_invisible_char(c) ? K"ErrorInvisibleChar" :
is_identifier_char(c) ? K"ErrorIdentifierStart" :
K"ErrorUnknownCharacter")
end
end
Expand Down
1 change: 1 addition & 0 deletions test/diagnostics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ end
@test diagnostic("a$(c)b") ==
Diagnostic(2, 1+sizeof(string(c)), :error, "invisible character $(repr(c))")
end
@test diagnostic("₁") == Diagnostic(1, 3, :error, "identifier cannot begin with character '₁'")
@test diagnostic(":⥻") == Diagnostic(2, 4, :error, "unknown unicode character '⥻'")

@test diagnostic("\"X \u202a X\"") == Diagnostic(2, 8, :error, "unbalanced bidirectional unicode formatting \"X \\u202a X\"")
Expand Down
1 change: 1 addition & 0 deletions test/tokenize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1009,6 +1009,7 @@ end

@testset "invalid UTF-8 characters" begin
@test onlytok("\x00") == K"ErrorUnknownCharacter"
@test onlytok("₁") == K"ErrorIdentifierStart"

bad_chars = [
first("\xe2") # malformed
Expand Down