From 3a76a1b8cb00223154eebfde23724e99b5b8813a Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 13 Feb 2025 14:32:01 +1000 Subject: [PATCH] Make `Expr(:incomplete)` detection more robust to whitespace Rework incomplete expression detection so that trailing whitespace is always ignored, regardless of how the parser itself decides to attach it to other nodes of the tree. To do this, we walk back from the end of the parse stream and look for the byte offset of the last non-whitespace token. We then use that to determine whether the error node is "at the end of the parse". Improve testing by * Extracting the incomplete expressions which are part of the REPL stdlib tests and ensuring these match the incomplete tag generation of the flisp parser. Fix some divergences for `var""` syntax and invalid escape sequences in strings. * Ensuring that we test both `:statement` and `:all` level parsing - the REPL uses `:all` to allow parsing of multiple top level statements, so we need to test this. Also fix a minor bug where `enable_in_core!(false)` would result in the flisp parser being used, regardless of whether `VERSION` ships with JuliaSyntax enabled by default. Fixes #519. See also #518. --- src/hooks.jl | 21 ++- src/parse_stream.jl | 11 ++ test/hooks.jl | 321 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 339 insertions(+), 14 deletions(-) diff --git a/src/hooks.jl b/src/hooks.jl index a392b61d..914c0c1b 100644 --- a/src/hooks.jl +++ b/src/hooks.jl @@ -35,15 +35,17 @@ end function _incomplete_tag(n::SyntaxNode, codelen) i,c = _first_error(n) if isnothing(c) || last_byte(c) < codelen || codelen == 0 - return :none - elseif first_byte(c) <= codelen - if kind(c) == K"ErrorEofMultiComment" && last_byte(c) == codelen + if kind(c) == K"ErrorEofMultiComment" # This is the one weird case where the token itself is an # incomplete error return :comment else return :none end + elseif first_byte(c) <= codelen && kind(c) != K"ErrorInvalidEscapeSequence" + # "ErrorInvalidEscapeSequence" may be incomplete, so we don't include it + # here as a hard error. + return :none end if kind(c) == K"error" && numchildren(c) > 0 for cc in children(c) @@ -56,7 +58,7 @@ function _incomplete_tag(n::SyntaxNode, codelen) return :other end kp = kind(c.parent) - if kp == K"string" + if kp == K"string" || kp == K"var" return :string elseif kp == K"cmdstring" return :cmd @@ -170,7 +172,6 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti end end parse!(stream; rule=options) - pos_before_trivia = last_byte(stream) if options === :statement bump_trivia(stream; skip_newlines=false) if peek(stream) == K"NewlineWs" @@ -179,8 +180,9 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti end if any_error(stream) + pos_before_comments = last_non_whitespace_byte(stream) tree = build_tree(SyntaxNode, stream, first_line=lineno, filename=filename) - tag = _incomplete_tag(tree, pos_before_trivia) + tag = _incomplete_tag(tree, pos_before_comments) if _has_v1_10_hooks exc = ParseError(stream, filename=filename, first_line=lineno, incomplete_tag=tag) @@ -245,6 +247,7 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti # EXIT last_offset=$last_offset #-#-#- """) + flush(_debug_log[]) end # Rewrap result in an svec for use by the C code @@ -257,6 +260,7 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti # $exc #-#-#- """) + flush(_debug_log[]) end @error("""JuliaSyntax parser failed — falling back to flisp! This is not your fault. Please submit a bug report to https://github.com/JuliaLang/JuliaSyntax.jl/issues""", @@ -284,6 +288,8 @@ else Base.Meta.ParseError(e::JuliaSyntax.ParseError) = e end +_default_system_parser = _has_v1_6_hooks ? Core._parse : nothing + """ enable_in_core!([enable=true; freeze_world_age=true, debug_filename=nothing]) @@ -313,7 +319,8 @@ function enable_in_core!(enable=true; freeze_world_age = true, world_age = freeze_world_age ? Base.get_world_counter() : typemax(UInt) _set_core_parse_hook(fix_world_age(core_parser_hook, world_age)) else - _set_core_parse_hook(Core.Compiler.fl_parse) + @assert !isnothing(_default_system_parser) + _set_core_parse_hook(_default_system_parser) end nothing end diff --git a/src/parse_stream.jl b/src/parse_stream.jl index 33e029c6..0c57c2a4 100644 --- a/src/parse_stream.jl +++ b/src/parse_stream.jl @@ -1257,6 +1257,17 @@ first_byte(stream::ParseStream) = first(stream.tokens).next_byte # Use sentinel last_byte(stream::ParseStream) = _next_byte(stream)-1 any_error(stream::ParseStream) = any_error(stream.diagnostics) +# Return last non-whitespace byte which was parsed +function last_non_whitespace_byte(stream::ParseStream) + for i = length(stream.tokens):-1:1 + tok = stream.tokens[i] + if !(kind(tok) in KSet"Comment Whitespace NewlineWs ErrorEofMultiComment") + return tok.next_byte - 1 + end + end + return first_byte(stream) - 1 +end + function Base.empty!(stream::ParseStream) t = last(stream.tokens) empty!(stream.tokens) diff --git a/test/hooks.jl b/test/hooks.jl index 61772ce0..c41d2dac 100644 --- a/test/hooks.jl +++ b/test/hooks.jl @@ -117,11 +117,6 @@ end end @test Meta.parse(mystr) == :hi - JuliaSyntax.enable_in_core!(false) - end - - @testset "Expr(:incomplete)" begin - JuliaSyntax.enable_in_core!() err = Meta.parse("\"") @test Meta.isexpr(err, :incomplete) if JuliaSyntax._has_v1_10_hooks @@ -134,6 +129,10 @@ end @test err.args[1] isa String end + JuliaSyntax.enable_in_core!(false) + end + + @testset "Expr(:incomplete)" begin for (str, tag) in [ "\"" => :string "\"\$foo" => :string @@ -195,12 +194,320 @@ end "a b" => :none "()x" => :none "." => :none + + # Some error tokens which cannot be made complete by appending more characters + "1.e1." => :none + "\u200b" => :none + "x #=\xf5b\n=#" => :none + "₁" => :none + "0x1.0\n" => :none + "\"\$x෴\"" => :none + "10e1000" => :none + + # Multiline input with comments (#519) + "function f()\nbody #comment" => :block + "a = [\n1,\n2, #comment" => :other + + # Extended set of cases extracted from the REPL stdlib tests. + # There is some redundancy here, but we've mostly left these + # here because incomplete-detection is partly heuristic and + # it's good to have a wide variety of incomplete expressions. + # + # The "desired" incomplete tag here was generated from the + # flisp parser. + "Main.CompletionFoo." => :other + "Base.return_types(getin" => :other + "test7()." => :other + "(3,2)." => :other + "Base.print(\"lol" => :string + "run(`lol" => :cmd + "copy(A')." => :other + "cd(\"path_to_an_empty_folder_should_not_complete_latex\\\\\\alpha" => :string + "\"C:\\\\ \\alpha" => :string + "cd(\"C:\\U" => :string + "max(" => :other + "!(" => :other + "!isnothing(" => :other + "!!isnothing(" => :other + "CompletionFoo.test(1, 1, " => :other + "CompletionFoo.test(CompletionFoo.array," => :other + "CompletionFoo.test(1,1,1," => :other + "CompletionFoo.test1(Int," => :other + "CompletionFoo.test1(Float64," => :other + "prevind(\"θ\",1," => :other + "(1, CompletionFoo.test2(\")\"," => :other + "(1, CompletionFoo.test2(')'," => :other + "(1, CompletionFoo.test2(`')'`," => :other + "CompletionFoo.test3([1, 2] .+ CompletionFoo.varfloat," => :other + "CompletionFoo.test3([1.,2.], 1.," => :other + "CompletionFoo.test4(\"e\",r\" \"," => :other + "CompletionFoo.test5(broadcast((x,y)->x==y, push!(Base.split(\"\",' '),\"\",\"\"), \"\")," => :other + "CompletionFoo.test5(Bool[x==1 for x=1:4]," => :other + "CompletionFoo.test4(CompletionFoo.test_y_array[1]()[1], CompletionFoo.test_y_array[1]()[2], " => :other + "CompletionFoo.test4(\"\\\"\"," => :other + "convert(" => :other + "convert(" => :other + "CompletionFoo.test5(AbstractArray[Bool[]][1]," => :other + "CompletionFoo.test3(@time([1, 2] .+ CompletionFoo.varfloat)," => :other + "CompletionFoo.kwtest( " => :other + "CompletionFoo.kwtest(;" => :other + "CompletionFoo.kwtest(; x=1, " => :other + "CompletionFoo.kwtest(; kw=1, " => :other + "CompletionFoo.kwtest(x=1, " => :other + "CompletionFoo.kwtest(x=1; " => :other + "CompletionFoo.kwtest(x=kw=1, " => :other + "CompletionFoo.kwtest(; x=kw=1, " => :other + "CompletionFoo.kwtest2(1, x=1," => :other + "CompletionFoo.kwtest2(1; x=1, " => :other + "CompletionFoo.kwtest2(1, x=1; " => :other + "CompletionFoo.kwtest2(1, kw=1, " => :other + "CompletionFoo.kwtest2(1; kw=1, " => :other + "CompletionFoo.kwtest2(1, kw=1; " => :other + "CompletionFoo.kwtest2(y=3, 1, " => :other + "CompletionFoo.kwtest2(y=3, 1; " => :other + "CompletionFoo.kwtest2(kw=3, 1, " => :other + "CompletionFoo.kwtest2(kw=3, 1; " => :other + "CompletionFoo.kwtest2(1; " => :other + "CompletionFoo.kwtest2(1, " => :other + "CompletionFoo.kwtest4(x23=18, x; " => :other + "CompletionFoo.kwtest4(x23=18, x, " => :other + "CompletionFoo.kwtest4(x23=18, " => :other + "CompletionFoo.kwtest5(3, somekwarg=6," => :other + "CompletionFoo.kwtest5(3, somekwarg=6, anything, " => :other + "CompletionFoo.?([1,2,3], 2.0" => :other + "CompletionFoo.?('c'" => :other + "CompletionFoo.?(false, \"a\", 3, " => :other + "CompletionFoo.?(false, \"a\", 3, " => :other + "CompletionFoo.?(\"a\", 3, " => :other + "CompletionFoo.?(; " => :other + "CompletionFoo.?(" => :other + "CompletionFoo.test10(z, Integer[]...," => :other + "CompletionFoo.test10(3, Integer[]...," => :other + "CompletionFoo.test10(3, 4," => :other + "CompletionFoo.test10(3, 4, 5," => :other + "CompletionFoo.test10(z, z, 0, " => :other + "CompletionFoo.test10(\"a\", Union{Signed,Bool,String}[3][1], " => :other + "CompletionFoo.test11(Integer[false][1], Integer[14][1], " => :other + "CompletionFoo.test11(Integer[-7][1], Integer[0x6][1], 6," => :other + "CompletionFoo.test11(3, 4," => :other + "CompletionFoo.test11(0x8, 5," => :other + "CompletionFoo.test11(0x8, 'c'," => :other + "CompletionFoo.test11('d', 3," => :other + "CompletionFoo.test!12(" => :other + "CompletionFoo.kwtest(; x=2, y=4; kw=3, " => :other + "CompletionFoo.kwtest(x=2; y=4; " => :other + "CompletionFoo.kwtest((x=y)=4, " => :other + "CompletionFoo.kwtest(; (x=y)=4, " => :other + "CompletionFoo.kwtest(; w...=16, " => :other + "CompletionFoo.kwtest(; 2, " => :other + "CompletionFoo.kwtest(; 2=3, " => :other + "CompletionFoo.kwtest3(im; (true ? length : length), " => :other + "CompletionFoo.kwtest.(x=2; y=4; " => :other + "CompletionFoo.kwtest.(; w...=16, " => :other + "(1+2im)." => :other + "((1+2im))." => :other + "CompletionFoo.test_y_array[1]." => :other + "CompletionFoo.named." => :other + "#=\n\\alpha" => :comment + "#=\nmax" => :comment + "using " => :other + "(max" => :other + "@show \"/dev/nul" => :string + "@show \"/tm" => :string + "@show \"/dev/nul" => :string + "(Iter" => :other + "\"/tmp/jl_4sjOtz/tmpfoob" => :string + "\"~" => :string + "\"~user" => :string + "\"/tmp/jl_Mn9Rbz/selfsym" => :string + "\"~/ka8w5rsz" => :string + "\"foo~bar" => :string + "\"~/Zx6Wa0GkC" => :string + "\"~/Zx6Wa0GkC0" => :string + "\"~/Zx6Wa0GkC0/my_" => :string + "\"~/Zx6Wa0GkC0/my_file" => :string + "cd(\"folder_do_not_exist_77/file" => :string + "CompletionFoo.tuple." => :other + "CompletionFoo.test_dict[\"ab" => :string + "CompletionFoo.test_dict[\"abcd" => :string + "CompletionFoo.test_dict[ \"abcd" => :string + "CompletionFoo.test_dict[\"abcd" => :string + "CompletionFoo.test_dict[:b" => :other + "CompletionFoo.test_dict[:bar2" => :other + "CompletionFoo.test_dict[Ba" => :other + "CompletionFoo.test_dict[occ" => :other + "CompletionFoo.test_dict[`l" => :cmd + "CompletionFoo.test_dict[6" => :other + "CompletionFoo.test_dict[66" => :other + "CompletionFoo.test_dict[(" => :other + "CompletionFoo.test_dict[\"\\alp" => :string + "CompletionFoo.test_dict[\"\\alpha" => :string + "CompletionFoo.test_dict[\"α" => :string + "CompletionFoo.test_dict[:α" => :other + "CompletionFoo.test_dict[" => :other + "CompletionFoo.test_customdict[\"ab" => :string + "CompletionFoo.test_customdict[\"abcd" => :string + "CompletionFoo.test_customdict[ \"abcd" => :string + "CompletionFoo.test_customdict[\"abcd" => :string + "CompletionFoo.test_customdict[:b" => :other + "CompletionFoo.test_customdict[:bar2" => :other + "CompletionFoo.test_customdict[Ba" => :other + "CompletionFoo.test_customdict[occ" => :other + "CompletionFoo.test_customdict[`l" => :cmd + "CompletionFoo.test_customdict[6" => :other + "CompletionFoo.test_customdict[66" => :other + "CompletionFoo.test_customdict[(" => :other + "CompletionFoo.test_customdict[\"\\alp" => :string + "CompletionFoo.test_customdict[\"\\alpha" => :string + "CompletionFoo.test_customdict[\"α" => :string + "CompletionFoo.test_customdict[:α" => :other + "CompletionFoo.test_customdict[" => :other + "test_repl_comp_dict[\"ab" => :string + "test_repl_comp_dict[\"abcd" => :string + "test_repl_comp_dict[ \"abcd" => :string + "test_repl_comp_dict[\"abcd" => :string + "test_repl_comp_dict[:b" => :other + "test_repl_comp_dict[:bar2" => :other + "test_repl_comp_dict[Ba" => :other + "test_repl_comp_dict[occ" => :other + "test_repl_comp_dict[`l" => :cmd + "test_repl_comp_dict[6" => :other + "test_repl_comp_dict[66" => :other + "test_repl_comp_dict[(" => :other + "test_repl_comp_dict[\"\\alp" => :string + "test_repl_comp_dict[\"\\alpha" => :string + "test_repl_comp_dict[\"α" => :string + "test_repl_comp_dict[:α" => :other + "test_repl_comp_dict[" => :other + "test_repl_comp_customdict[\"ab" => :string + "test_repl_comp_customdict[\"abcd" => :string + "test_repl_comp_customdict[ \"abcd" => :string + "test_repl_comp_customdict[\"abcd" => :string + "test_repl_comp_customdict[:b" => :other + "test_repl_comp_customdict[:bar2" => :other + "test_repl_comp_customdict[Ba" => :other + "test_repl_comp_customdict[occ" => :other + "test_repl_comp_customdict[`l" => :cmd + "test_repl_comp_customdict[6" => :other + "test_repl_comp_customdict[66" => :other + "test_repl_comp_customdict[(" => :other + "test_repl_comp_customdict[\"\\alp" => :string + "test_repl_comp_customdict[\"\\alpha" => :string + "test_repl_comp_customdict[\"α" => :string + "test_repl_comp_customdict[:α" => :other + "test_repl_comp_customdict[" => :other + "CompletionFoo.kwtest3(a;foob" => :other + "CompletionFoo.kwtest3(a; le" => :other + "CompletionFoo.kwtest3.(a;\nlength" => :other + "CompletionFoo.kwtest3(a, length=4, l" => :other + "CompletionFoo.kwtest3(a; kwargs..., fo" => :other + "CompletionFoo.kwtest3(a; another!kwarg=0, le" => :other + "CompletionFoo.kwtest3(a; another!" => :other + "CompletionFoo.kwtest3(a; another!kwarg=0, foob" => :other + "CompletionFoo.kwtest3(a; namedarg=0, foob" => :other + "kwtest3(blabla; unknown=4, namedar" => :other + "kwtest3(blabla; named" => :other + "kwtest3(blabla; named." => :other + "kwtest3(blabla; named..., another!" => :other + "kwtest3(blabla; named..., len" => :other + "kwtest3(1+3im; named" => :other + "kwtest3(1+3im; named." => :other + "CompletionFoo.kwtest4(a; x23=0, _" => :other + "CompletionFoo.kwtest4(a; xαβγ=1, _" => :other + "CompletionFoo.kwtest4.(a; xαβγ=1, _" => :other + "CompletionFoo.kwtest4(a; x23=0, x" => :other + "CompletionFoo.kwtest4.(a; x23=0, x" => :other + "CompletionFoo.kwtest4(a; _a1b=1, x" => :other + "CompletionFoo.kwtest5(3, 5; somek" => :other + "CompletionFoo.kwtest5(3, 5, somekwarg=4, somek" => :other + "CompletionFoo.kwtest5(3, 5, 7; somekw" => :other + "CompletionFoo.kwtest5(3, 5, 7, 9; somekw" => :other + "CompletionFoo.kwtest5(3, 5, 7, 9, Any[]...; somek" => :other + "CompletionFoo.kwtest5(unknownsplat...; somekw" => :other + "CompletionFoo.kwtest5(3, 5, 7, 9, somekwarg=4, somek" => :other + "CompletionFoo.kwtest5(String[]..., unknownsplat...; xy" => :other + "CompletionFoo.kwtest5('a', unknownsplat...; xy" => :other + "CompletionFoo.kwtest5('a', 3, String[]...; xy" => :other + "CompletionFoo.kwtest3(" => :other + "CompletionFoo.kwtest3(a;" => :other + "CompletionFoo.kwtest3(a; len2=" => :other + "CompletionFoo.kwtest3(a; len2=le" => :other + "CompletionFoo.kwtest3(a; len2=3 " => :other + "CompletionFoo.kwtest3(a; [le" => :other + "CompletionFoo.kwtest3([length; le" => :other + "CompletionFoo.kwtest3(a; (le" => :other + "CompletionFoo.kwtest3(a; foo(le" => :other + "CompletionFoo.kwtest3(a; (; le" => :other + "CompletionFoo.kwtest3(a; length, " => :other + "CompletionFoo.kwtest3(a; kwargs..., " => :other + ":(function foo(::Int) end).args[1].args[2]." => :other + "log(log.(varfloat)," => :other + "Base.return_types(getin" => :other + "test(1,1, " => :other + "test.(1,1, " => :other + "prevind(\"θ\",1," => :other + "typeof(+)." => :other + "test_dict[\"ab" => :string + "CompletionFoo.x." => :other + "@noexist." => :other + "Main.@noexist." => :none # <- Invalid syntax which adding a suffix can't fix + "@Main.noexist." => :other + "@show." => :other + "@macroexpand." => :other + "CompletionFoo.@foobar()." => :other + "CompletionFoo.@foobar(4)." => :other + "foo(#=#==#=##==#).rs[1]." => :other + "foo().r." => :other + "foo(#=#=# =#= =#).r." => :other + "test_47594." => :other + "Issue36437(42)." => :other + "Some(Issue36437(42)).value." => :other + "some_issue36437.value." => :other + "some_issue36437.value.a, some_issue36437.value." => :other + "@show some_issue36437.value.a; some_issue36437.value." => :other + "()." => :other + "Ref(Issue36437(42))[]." => :other + "global_dict[:r]." => :other + "global_dict_nested[:g][:r]." => :other + "global_dict_nested[" => :other + "global_dict_nested[:g][" => :other + "pop!(global_xs)." => :other + "tcd1." => :other + "tcd1.x." => :other + "tcd1.x.v." => :other + "getkeyelem(mutable_const_prop)." => :other + "getkeyelem(mutable_const_prop).value." => :other + "var\"complicated " => :string + "WeirdNames().var\"oh " => :string + "WeirdNames().var\"" => :string + "\"abc\"." => :other + "(rand(Bool) ? issue51499_2_1 : issue51499_2_2)." => :other + "union_somes(1, 1.0)." => :other + "union_some_ref(1, 1.0)." => :other + "Issue49892(fal" => :other + "-CompletionFoo.Test_y(3)." => :other + "99 ⨷⁻ᵨ⁷ CompletionFoo.type_test." => :other + "CompletionFoo.type_test + CompletionFoo.Test_y(2)." => :other + "(CompletionFoo.type_test + CompletionFoo.Test_y(2))." => :other + "CompletionFoo.type_test + CompletionFoo.unicode_αβγ." => :other + "(CompletionFoo.type_test + CompletionFoo.unicode_αβγ)." => :other + "using Base." => :other + "@time(using .Iss" => :other + "using .Issue52922.Inner1." => :other + "Issue53126()." => :other + "using " => :other + "global xxx::Number = Base." => :other ] @testset "$(repr(str))" begin - @test Base.incomplete_tag(Meta.parse(str, raise=false)) == tag + # Test :statement parsing + ex = JuliaSyntax.core_parser_hook(str, "somefile", 1, 0, :statement)[1] + @test Base.incomplete_tag(ex) == tag + # Test :all parsing - this is what the REPL uses to parse user input. + ex = JuliaSyntax.core_parser_hook(str, "somefile", 1, 0, :all)[1] + @test ex.head == :toplevel + @test Base.incomplete_tag(ex.args[end]) == tag end end - JuliaSyntax.enable_in_core!(false) # Should not throw @test JuliaSyntax.core_parser_hook("+=", "somefile", 1, 0, :statement)[1] isa Expr