diff --git a/docs/src/api.md b/docs/src/api.md index eae6e5f1..da35c941 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -30,14 +30,36 @@ JuliaSyntax.untokenize JuliaSyntax.Token ``` -## Source file handling +## Source code handling + +This section describes the generic functions for source text, source location +computation and formatting functions. + +Contiguous syntax objects like nodes in the syntax tree should implement the +following where possible: ```@docs -JuliaSyntax.SourceFile -JuliaSyntax.highlight -JuliaSyntax.sourcetext +JuliaSyntax.sourcefile +JuliaSyntax.byte_range +``` + +This will provide implementations of the following which include range +information, line numbers, and fancy highlighting of source ranges: + +```@docs +JuliaSyntax.first_byte +JuliaSyntax.last_byte +JuliaSyntax.filename JuliaSyntax.source_line JuliaSyntax.source_location +JuliaSyntax.sourcetext +JuliaSyntax.highlight +``` + +`SourceFile`-specific functions: + +```@docs +JuliaSyntax.SourceFile JuliaSyntax.source_line_range ``` @@ -64,8 +86,5 @@ JuliaSyntax.GreenNode ``` Functions applicable to syntax trees include everything in the sections on -heads/kinds, and source file handling. - -```@docs -JuliaSyntax.byte_range -``` +heads/kinds as well as the accessor functions in the source code handling +section. diff --git a/src/diagnostics.jl b/src/diagnostics.jl index 76e8d6a7..39fa473f 100644 --- a/src/diagnostics.jl +++ b/src/diagnostics.jl @@ -37,8 +37,7 @@ function Diagnostic(first_byte, last_byte; error=nothing, warning=nothing) Diagnostic(first_byte, last_byte, level, message) end -first_byte(d::Diagnostic) = d.first_byte -last_byte(d::Diagnostic) = d.last_byte +byte_range(d::Diagnostic) = d.first_byte:d.last_byte is_error(d::Diagnostic) = d.level === :error # Make relative path into a file URL @@ -72,12 +71,12 @@ function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) (:normal, "Info") line, col = source_location(source, first_byte(diagnostic)) linecol = "$line:$col" - filename = source.filename + fname = filename(source) file_href = nothing - if !isnothing(filename) - locstr = "$filename:$linecol" - if !startswith(filename, "REPL[") && get(io, :color, false) - url = _file_url(filename) + if !isempty(fname) + locstr = "$fname:$linecol" + if !startswith(fname, "REPL[") && get(io, :color, false) + url = _file_url(fname) if !isnothing(url) file_href = url*"#$linecol" end diff --git a/src/expr.jl b/src/expr.jl index a0cbf91f..d600a99b 100644 --- a/src/expr.jl +++ b/src/expr.jl @@ -68,6 +68,11 @@ function _strip_parens(ex) end end +# Get Julia value of leaf node as it would be represented in `Expr` form +function _expr_leaf_val(node::SyntaxNode) + node.val +end + function _leaf_to_Expr(source, txtbuf, head, srcrange, node) k = kind(head) if k == K"core_@cmd" @@ -79,7 +84,7 @@ function _leaf_to_Expr(source, txtbuf, head, srcrange, node) Expr(:error) : Expr(:error, "$(_token_error_descriptions[k]): `$(source[srcrange])`") else - val = isnothing(node) ? parse_julia_literal(txtbuf, head, srcrange) : node.val + val = isnothing(node) ? parse_julia_literal(txtbuf, head, srcrange) : _expr_leaf_val(node) if val isa Union{Int128,UInt128,BigInt} # Ignore the values of large integers and convert them back to # symbolic/textural form for compatibility with the Expr @@ -519,14 +524,7 @@ function build_tree(::Type{Expr}, stream::ParseStream; only(_fixup_Expr_children!(SyntaxHead(K"None",EMPTY_FLAGS), loc, Any[entry.ex])) end -""" -Get the source file for a given syntax object -""" -function sourcefile(node::SyntaxNode) - node.source -end - -function _to_expr(node::SyntaxNode) +function _to_expr(node) file = sourcefile(node) if !haschildren(node) offset, txtbuf = _unsafe_wrap_substring(sourcetext(file)) @@ -537,9 +535,13 @@ function _to_expr(node::SyntaxNode) _internal_node_to_Expr(file, byte_range(node), head(node), byte_range.(cs), head.(cs), args) end -function Base.Expr(node::SyntaxNode) +function to_expr(node) ex = _to_expr(node) - loc = source_location(LineNumberNode, sourcefile(node), first_byte(node)) + loc = source_location(LineNumberNode, node) only(_fixup_Expr_children!(SyntaxHead(K"None",EMPTY_FLAGS), loc, Any[ex])) end +function Base.Expr(node::SyntaxNode) + to_expr(node) +end + diff --git a/src/parse_stream.jl b/src/parse_stream.jl index 0c80ef9d..dc2192f9 100644 --- a/src/parse_stream.jl +++ b/src/parse_stream.jl @@ -514,8 +514,7 @@ struct FullToken end head(t::FullToken) = t.head -first_byte(t::FullToken) = t.first_byte -last_byte(t::FullToken) = t.last_byte +byte_range(t::FullToken) = t.first_byte:t.last_byte span(t::FullToken) = 1 + last_byte(t) - first_byte(t) function peek_full_token(stream::ParseStream, n::Integer=1; diff --git a/src/parser_api.jl b/src/parser_api.jl index 51548a99..fb805aa7 100644 --- a/src/parser_api.jl +++ b/src/parser_api.jl @@ -26,6 +26,8 @@ function Base.showerror(io::IO, err::ParseError) show_diagnostics(io, err.diagnostics[1:i], err.source) end +sourcefile(err::ParseError) = err.source + """ parse!(stream::ParseStream; rule=:all) diff --git a/src/source_files.jl b/src/source_files.jl index a5b14f09..57d89917 100644 --- a/src/source_files.jl +++ b/src/source_files.jl @@ -1,3 +1,110 @@ +#------------------------------------------------------------------------------- +# Generic functions for source text, source location computation and formatting +# functions + +""" + sourcefile(x) + +Get the source file object (usually `SourceFile`) for a given syntax object +`x`. The source file along with a byte range may be used to compute +`source_line()`, `source_location()`, `filename()`, etc. +""" +function sourcefile +end + +""" + byte_range(x) + +Return the range of bytes which `x` covers in the source text. +""" +function byte_range +end + +""" + first_byte(x) + +Return the first byte of `x` in the source text. +""" +first_byte(x) = first(byte_range(x)) + +""" + first_byte(x) + +Return the last byte of `x` in the source text. +""" +last_byte(x) = last(byte_range(x)) + +""" + filename(x) + +Get file name associated with `source`, or an empty string if one didn't exist. + +For objects `x` such as syntax trees, defers to `filename(sourcefile(x))` by +default. +""" +function filename(x) + source = sourcefile(x) + isnothing(source) ? "" : filename(source) +end + +""" + source_line(x) + source_line(source::SourceFile, byte_index::Integer) + +Get the line number of the first line on which object `x` appears. In the +second form, get the line number at the given `byte_index` within `source`. +""" +source_line(x) = source_line(sourcefile(x), first_byte(x)) + +""" + souce_location(x) + souce_location(source::SourceFile, byte_index::Integer) + + souce_location(LineNumberNode, x) + souce_location(LineNumberNode, source, byte_index) + +Get `(line,column)` of the first byte where object `x` appears in the source. +The second form allows one to be more precise with the `byte_index`, given the +source file. + +Providing `LineNumberNode` as the first agrument will return the line and file +name in a line number node object. +""" +source_location(x) = source_location(sourcefile(x), first_byte(x)) + +""" + sourcetext(x) + +Get the full source text syntax object `x` +""" +function sourcetext(x) + view(sourcefile(x), byte_range(x)) +end + +""" + highlight(io, x; color, note, notecolor, + context_lines_before, context_lines_inner, context_lines_after) + + highlight(io::IO, source::SourceFile, range::UnitRange; kws...) + +Print the lines of source code surrounding `x` which is highlighted with +background `color` and underlined with markers in the text. A `note` in +`notecolor` may be provided as annotation. By default, `x` should be an object +with `sourcefile(x)` and `byte_range(x)` implemented. + +The context arguments `context_lines_before`, etc, refer to the number of +lines of code which will be printed as context before and after, with `inner` +referring to context lines inside a multiline region. + +The second form shares the keywords of the first but allows an explicit source +file and byte range to be supplied. +""" +function highlight(io::IO, x; kws...) + highlight(io, sourcefile(x), byte_range(x); kws...) +end + + +#------------------------------------------------------------------------------- """ SourceFile(code [; filename=nothing, first_line=1, first_index=1]) @@ -53,16 +160,19 @@ function _source_line_index(source::SourceFile, byte_index) end _source_line(source::SourceFile, lineidx) = lineidx + source.first_line - 1 -""" -Get the line number at the given byte index. -""" -source_line(source::SourceFile, byte_index) = +function source_location(::Type{LineNumberNode}, x) + source_location(LineNumberNode, sourcefile(x), first_byte(x)) +end + +source_line(source::SourceFile, byte_index::Integer) = _source_line(source, _source_line_index(source, byte_index)) -""" -Get line number and character within the line at the given byte index. -""" -function source_location(source::SourceFile, byte_index) +function filename(source::SourceFile) + f = source.filename + !isnothing(f) ? f : "" +end + +function source_location(source::SourceFile, byte_index::Integer) lineidx = _source_line_index(source, byte_index) i = source.line_starts[lineidx] column = 1 @@ -77,7 +187,7 @@ end Get byte range of the source line at byte_index, buffered by `context_lines_before` and `context_lines_after` before and after. """ -function source_line_range(source::SourceFile, byte_index; +function source_line_range(source::SourceFile, byte_index::Integer; context_lines_before=0, context_lines_after=0) lineidx = _source_line_index(source, byte_index) fbyte = source.line_starts[max(lineidx-context_lines_before, 1)] @@ -86,14 +196,14 @@ function source_line_range(source::SourceFile, byte_index; lbyte + source.byte_offset) end -function source_location(::Type{LineNumberNode}, source::SourceFile, byte_index) - LineNumberNode(source_line(source, byte_index), - isnothing(source.filename) ? nothing : Symbol(source.filename)) +function source_location(::Type{LineNumberNode}, source::SourceFile, byte_index::Integer) + fn = filename(source) + LineNumberNode(source_line(source, byte_index), isempty(fn) ? nothing : Symbol(fn)) end function Base.show(io::IO, ::MIME"text/plain", source::SourceFile) - fn = isnothing(source.filename) ? "" : " $(source.filename)" - header = "## SourceFile$fn ##" + fn = filename(source) + header = "## SourceFile$(isempty(fn) ? "" : " ")$fn ##" print(io, header, "\n") heightlim = displaysize(io)[1] ÷ 2 if !get(io, :limit, false) || length(source.line_starts) <= heightlim @@ -193,27 +303,6 @@ function _print_marker_line(io, prefix_str, str, underline, singleline, color, end end -function highlight(io::IO, x; kws...) - highlight(io, sourcefile(x), byte_range(x); kws...) -end - -""" - highlight(io::IO, source::SourceFile, range::UnitRange; - color, note, notecolor, - context_lines_before, context_lines_inner, context_lines_after, - highlight(io, x; kws...) - -Print the lines of source code `source` surrounding the given byte `range` -which is highlighted with background `color` and underlined with markers in the -text. A `note` in `notecolor` may be provided as annotation. - -In the second form, `x` is an object with `sourcefile(x)` and `byte_range(x)` -implemented. - -The context arguments `context_lines_before`, etc, refer to the number of -lines of code which will be printed as context before and after, with `inner` -referring to context lines inside a multiline region. -""" function highlight(io::IO, source::SourceFile, range::UnitRange; color=(120,70,70), context_lines_before=2, context_lines_inner=1, context_lines_after=2, diff --git a/src/syntax_tree.jl b/src/syntax_tree.jl index d885af80..dc9a7375 100644 --- a/src/syntax_tree.jl +++ b/src/syntax_tree.jl @@ -121,31 +121,13 @@ head(node::AbstractSyntaxNode) = head(node.raw) span(node::AbstractSyntaxNode) = span(node.raw) -first_byte(node::AbstractSyntaxNode) = node.position -last_byte(node::AbstractSyntaxNode) = node.position + span(node) - 1 +byte_range(node::AbstractSyntaxNode) = node.position:(node.position + span(node) - 1) -""" - byte_range(ex) - -Return the range of bytes which `ex` covers in the source text. -""" -byte_range(ex) = first_byte(ex):last_byte(ex) - -""" - sourcetext(node) - -Get the full source text of a node. -""" -function sourcetext(node::AbstractSyntaxNode) - view(sourcefile(node), byte_range(node)) -end - -source_line(node::AbstractSyntaxNode) = source_line(sourcefile(node), node.position) -source_location(node::AbstractSyntaxNode) = source_location(sourcefile(node), node.position) +sourcefile(node::AbstractSyntaxNode) = node.source function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode, indent, show_byte_offsets) - fname = sourcefile(node).filename + fname = filename(node) line, col = source_location(node) posstr = "$(lpad(line, 4)):$(rpad(col,3))│" if show_byte_offsets @@ -192,7 +174,7 @@ end function Base.show(io::IO, ::MIME"text/plain", node::AbstractSyntaxNode; show_byte_offsets=false) println(io, "line:col│$(show_byte_offsets ? " byte_range │" : "") tree │ file_name") - _show_syntax_node(io, Ref{Union{Nothing,String}}(nothing), node, "", show_byte_offsets) + _show_syntax_node(io, Ref(""), node, "", show_byte_offsets) end function Base.show(io::IO, ::MIME"text/x.sexpression", node::AbstractSyntaxNode) diff --git a/test/hooks.jl b/test/hooks.jl index d5944a04..35939119 100644 --- a/test/hooks.jl +++ b/test/hooks.jl @@ -35,13 +35,13 @@ end JuliaSyntax.core_parser_hook("[x)", "f1", 1, 0, :statement) ) @test err isa JuliaSyntax.ParseError - @test err.source.filename == "f1" + @test filename(err) == "f1" @test err.source.first_line == 1 err = _unwrap_parse_error( JuliaSyntax.core_parser_hook("[x)", "f2", 2, 0, :statement) ) @test err isa JuliaSyntax.ParseError - @test err.source.filename == "f2" + @test filename(err) == "f2" @test err.source.first_line == 2 # Errors including nontrivial offset indices diff --git a/test/test_utils.jl b/test/test_utils.jl index 69915af2..b16aef31 100644 --- a/test/test_utils.jl +++ b/test/test_utils.jl @@ -35,7 +35,8 @@ using .JuliaSyntax: fl_parse, highlight, tokenize, - untokenize + untokenize, + filename if VERSION < v"1.6" # Compat stuff which might not be in Base for older versions