diff --git a/README.md b/README.md index afbe683c2..786be8945 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ julia> using TypeProfiler julia> profile_and_watch_file("demo.jl") profiling demo.jl ... (finished in 2.745 sec) -═════ 6 possible errors found in demo.jl ═════ +═════ 6 possible errors found ═════ ┌ @ demo.jl:11 Main.fib(Main.m) │ variable Main.m is not defined: Main.fib(Main.m) └ @@ -96,7 +96,7 @@ This technique is often called "abstract interpretation" and TP internally uses Lastly let's apply the following diff to demo.jl so that it works nicely: -> git diff --no-index demo.jl demo-fixed.jl +> git apply fix-demo.diff ```diff diff --git a/demo.jl b/demo-fixed.jl @@ -136,7 +136,7 @@ index d2b188a..1d1b3da 100644 -@inline bar(v::Ty{T}) where {T<:Number} = bar(v.fdl) # typo "fdl" +@inline bar(v::Ty{T}) where {T<:Number} = bar(v.fld) # typo fixed @inline bar(v::Ty) = bar(convert(Number, v.fld)) - + foo(1.2) -foo("1") # `String` can't be converted to `Number` +foo('1') # `Char` will be converted to `UInt32` @@ -145,16 +145,14 @@ index d2b188a..1d1b3da 100644 If you save the file, TP will automatically trigger profiling, and this time, won't complain anything: ```julia -profiling demo.jl ... (finished in 1.913 sec) +profiling demo.jl ... (finished in 5.146 sec) No errors ! ``` ### TODOs -- profiling on a package (without actual loading) - * support `module` - * special case `include` calls +- support `module` expressions, profiling on a package (without actual loading) - more reports * more correct error reports in general * report some cases of `throw`, e.g. `rand('1')::ArgumentError("Sampler for this object is not defined")` diff --git a/demo-fixed.jl b/demo-fixed.jl deleted file mode 100644 index 1d1b3da18..000000000 --- a/demo-fixed.jl +++ /dev/null @@ -1,44 +0,0 @@ -######## -# demo # -######## - -# fibonacci -# --------- - -# cache, cache, cache -function fib(n::T) where {T<:Number} - cache = Dict(zero(T)=>zero(T), one(T)=>one(T)) - return _fib(n, cache) -end -_fib(n, cache) = if haskey(cache, n) - cache[n] -else - cache[n] = _fib(n-1, cache) + _fib(n-2, cache) -end - -fib(BigInt(1000)) # will terminate in ordinal execution as well -m = 1000 # define m -fib(m) -fib(parse(Int, "1000")) - - -# language features -# ----------------- - -# user-defined types -struct Ty{T} - fld::T -end - -function foo(a) - v = Ty(a) - return bar(v) -end - -# macros will be expanded -@inline bar(n::T) where {T<:Number} = n < 0 ? zero(T) : one(T) -@inline bar(v::Ty{T}) where {T<:Number} = bar(v.fld) # typo fixed -@inline bar(v::Ty) = bar(convert(Number, v.fld)) - -foo(1.2) -foo('1') # `Char` will be converted to `UInt32` diff --git a/fix-demo.diff b/fix-demo.diff new file mode 100644 index 000000000..8c3c324bd --- /dev/null +++ b/fix-demo.diff @@ -0,0 +1,41 @@ +diff --git a/demo.jl b/demo-fixed.jl +index d2b188a..1d1b3da 100644 +--- a/demo.jl ++++ b/demo.jl +@@ -5,11 +5,21 @@ + # fibonacci + # --------- + +-fib(n) = n ≤ 2 ? n : fib(n-1) + fib(n-2) ++# cache, cache, cache ++function fib(n::T) where {T<:Number} ++ cache = Dict(zero(T)=>zero(T), one(T)=>one(T)) ++ return _fib(n, cache) ++end ++_fib(n, cache) = if haskey(cache, n) ++ cache[n] ++else ++ cache[n] = _fib(n-1, cache) + _fib(n-2, cache) ++end + +-fib(1000) # never terminates in ordinal execution +-fib(m) # undef var +-fib("1000") # obvious type error ++fib(BigInt(1000)) # will terminate in ordinal execution as well ++m = 1000 # define m ++fib(m) ++fib(parse(Int, "1000")) + + + # language features +@@ -27,8 +37,8 @@ end + + # macros will be expanded + @inline bar(n::T) where {T<:Number} = n < 0 ? zero(T) : one(T) +-@inline bar(v::Ty{T}) where {T<:Number} = bar(v.fdl) # typo "fdl" ++@inline bar(v::Ty{T}) where {T<:Number} = bar(v.fld) # typo fixed + @inline bar(v::Ty) = bar(convert(Number, v.fld)) + + foo(1.2) +-foo("1") # `String` can't be converted to `Number` ++foo('1') # `Char` will be converted to `UInt32` diff --git a/src/TypeProfiler.jl b/src/TypeProfiler.jl index 7d68eea44..c7dbd4d83 100644 --- a/src/TypeProfiler.jl +++ b/src/TypeProfiler.jl @@ -34,6 +34,9 @@ import Base: import Base.Meta: isexpr, _parse_string +import Base.Iterators: + flatten + using FileWatching, Requires # includes diff --git a/src/abstractinterpretation.jl b/src/abstractinterpretation.jl index 719193501..44b5361f6 100644 --- a/src/abstractinterpretation.jl +++ b/src/abstractinterpretation.jl @@ -68,7 +68,12 @@ function abstract_eval_special_value(interp::TPInterpreter, @nospecialize(e), vt # add_remark!(interp, sv, UndefVarErrorReport(sv, sv.mod, s)) # end elseif isa(e, GlobalRef) - check_global_ref!(interp, sv, e.mod, e.name) && (ret = Bottom) # ret here should annotated as `Any` by `NativeInterpreter`, but here I would like to be more conservative and change it to `Bottom` + vgv = getvirtualglobalvar(interp, e.mod, e.name) + if isnothing(vgv) + check_global_ref!(interp, sv, e.mod, e.name) && (ret = Bottom) # ret here should annotated as `Any` by `NativeInterpreter`, but here I would like to be more conservative and change it to `Bottom` + else + ret = vgv + end end return ret @@ -97,17 +102,33 @@ end # return ret # end -# FIXME: -# this is such an super fragile and problematic copy-and-paste; -# the whole point is to not let abstract interpretation to halt even after there is an -# obvious error point found (, which is usually represented by `Bottom`-annotated type and -# triggers early-loop-escape in the native implementation), and allow TP to collect as much -# errors as possible; so the only diffs are: -# - the first argument changed to `TPInterpreter` -# - comment out 3 early-loop-escapes -# -# ... and actually this introduces one obvious bug that `typeinf_local(::TPInterpreter, ::InferenceState)` -# sometimes runs the same inference routine twice, which leads to duplicated error reports +""" + typeinf_local(::TPInterpreter, ::InferenceState) + +works as `typeinf_local(::AbstractInterpreter, ::InferenceState)` (i.e. "make as much + progress on `frame` as possible (without handling cycles)") but with few of tweaks for the + TypeProfiler.jl's abstract interpretation. + +this is such an super fragile and problematic copy-and-paste; the points are: +1. to not let abstract interpretation to halt even after there is an obvious error point + found (, which is usually represented by `Bottom`-annotated type and triggers + early-loop-escape in the native implementation), and allow TP to collect as much errors + as possible +2. to keep traced types of (virtual) global variables in toplevel frames (, which are + technically wrapped into a single virtual function, but originally globals) in the + `TPInterpreter.virtualglobalvartable` so that they can be referred across profiling on + different virtual functions + +so the actual diffs are: +- the first argument changed to `TPInterpreter` +- comment out 3 early-loop-escapes +- do "virtual global variable assignment" for toplevel frames + +!!! danger "FIXME:" + ... and actually this introduces one obvious bug that `typeinf_local(::TPInterpreter, ::InferenceState)` + sometimes runs the same inference routine twice, which leads to duplicated error reports +""" +function typeinf_local(::TPInterpreter, ::InferenceState) end push_inithook!() do Core.eval(Core.Compiler, quote @@ -231,6 +252,10 @@ function typeinf_local(interp::$(TPInterpreter), frame::InferenceState) lhs = stmt.args[1] if isa(lhs, Slot) changes = StateUpdate(lhs, VarState(t, false), changes) + # virtual global variable assignment + if $(istoplevelframe)(frame) + $(setvirtualglobalvar!)(interp, frame, lhs, t) + end end elseif hd === :method fname = stmt.args[1] diff --git a/src/abstractinterpreterinterface.jl b/src/abstractinterpreterinterface.jl index 080961c4f..68746f0d8 100644 --- a/src/abstractinterpreterinterface.jl +++ b/src/abstractinterpreterinterface.jl @@ -1,4 +1,5 @@ -# define `AbstractInterpreter` API +# `AbstractInterpreter` API +# ------------------------- struct TPInterpreter <: AbstractInterpreter native::NativeInterpreter @@ -7,15 +8,21 @@ struct TPInterpreter <: AbstractInterpreter compress::Bool discard_trees::Bool + # TypeProfiler.jl specific + istoplevel::Bool + virtualglobalvartable::Dict{Module,Dict{Symbol,Any}} # maybe we don't need this nested dicts + function TPInterpreter(world::UInt = get_world_counter(); inf_params::InferenceParams = InferenceParams(), opt_params::OptimizationParams = OptimizationParams(), optimize::Bool = false, compress::Bool = false, - discard_trees::Bool = false + discard_trees::Bool = false, + istoplevel::Bool = false, + virtualglobalvartable::AbstractDict = Dict() ) native = NativeInterpreter(world; inf_params, opt_params) - return new(native, [], optimize, compress, discard_trees) + return new(native, [], optimize, compress, discard_trees, istoplevel, virtualglobalvartable) end end @@ -41,3 +48,19 @@ end may_optimize(interp::TPInterpreter) = interp.optimize may_compress(interp::TPInterpreter) = interp.compress may_discard_trees(interp::TPInterpreter) = interp.discard_trees + +# TypeProfiler.jl specific +# ------------------------ + +istoplevel(interp::TPInterpreter) = interp.istoplevel +function getvirtualglobalvar(interp, mod, sym) + haskey(interp.virtualglobalvartable, mod) || return nothing + return get(interp.virtualglobalvartable[mod], sym, nothing) +end +function setvirtualglobalvar!(interp, frame, lhs::Slot, @nospecialize(rhs)) + mod = frame.mod + haskey(interp.virtualglobalvartable, mod) || (interp.virtualglobalvartable[mod] = Dict()) + + sym = frame.src.slotnames[lhs.id] + interp.virtualglobalvartable[mod][sym] = rhs +end diff --git a/src/print.jl b/src/print.jl index 652324807..e32d02a40 100644 --- a/src/print.jl +++ b/src/print.jl @@ -50,7 +50,6 @@ end # -------- function print_reports(io::IO, - filename::AbstractString, reports::Vector{<:ToplevelErrorReport}, @nospecialize(postprocess = identity); print_toplevel_sucess::Bool = false, @@ -68,8 +67,7 @@ function print_reports(io::IO, color = ERROR_COLOR s = with_bufferring(arg) do io - s = string(pluralize(length(reports), "toplevel error"), " found in ", - (fullpath ? tofullpath : identity)(filename)) + s = string(pluralize(length(reports), "toplevel error"), " found") printlnstyled(io, LEFT_ROOF, s, RIGHT_ROOF; color = HEADER_COLOR) rail = with_bufferring(arg) do io @@ -104,7 +102,6 @@ print_report(io, report::ActualErrorWrapped) = showerror(io, report.err, report. # --------- function print_reports(io::IO, - filename::AbstractString, reports::Vector{<:InferenceErrorReport}, @nospecialize(postprocess = identity); filter_native_remarks::Bool = true, @@ -126,8 +123,7 @@ function print_reports(io::IO, end s = with_bufferring(:color => color) do io - s = string(pluralize(length(reports), "possible error"), " found in ", - (fullpath ? tofullpath : identity)(filename)) + s = string(pluralize(length(reports), "possible error"), " found") printlnstyled(io, LEFT_ROOF, s, RIGHT_ROOF; color = HEADER_COLOR) wrote_linfos = Set{UInt64}() @@ -146,7 +142,15 @@ end # `typeinf_local` in src/abstractinterpretation.jl function uniquify_reports!(reports::Vector{<:InferenceErrorReport}) return unique!(reports) do report - return last(report.st), report.msg, report.sig # uniquify keys + # uniquify keys + return ( + # caller + first(report.st), + # error + last(report.st), + report.msg, + report.sig + ) end end diff --git a/src/profile.jl b/src/profile.jl index 8b2a3b5c6..3a3768b8c 100644 --- a/src/profile.jl +++ b/src/profile.jl @@ -16,7 +16,7 @@ function profile_file(io::IO, mod::Module = Main; kwargs...) text = read(filename, String) - profile_text(io, text, filename, mod; kwargs...) + return profile_text(io, text, filename, mod; kwargs...) end profile_file(args...; kwargs...) = profile_file(stdout, args...; kwargs...) @@ -26,8 +26,8 @@ function profile_text(io::IO, mod::Module = Main; profiling_logger::Union{Nothing,IO} = nothing, kwargs...) - reports, postprocess = report_errors(profiling_logger, mod, text, filename) - return print_reports(io, filename, reports, postprocess; kwargs...) + included_files, reports, postprocess = report_errors(profiling_logger, mod, text, filename) + return included_files, print_reports(io, reports, postprocess; kwargs...) end profile_text(args...; kwargs...) = profile_text(stdout, args...; kwargs...) @@ -48,15 +48,25 @@ function report_errors(actualmod, text, filename) virtualmod = generate_virtual_module(actualmod) ret = parse_and_transform(actualmod, virtualmod, text, filename) - if isa(ret, Vector{<:ToplevelErrorReport}) - return ret, generate_postprocess(virtualmod, actualmod) + + included_files = map(a->a.filename, ret) + reportsary = map(a->a.reports, ret) + # non-empty `reports` means critical errors happened during the AST transformation + if any(!isempty, reportsary) + reports = collect(flatten(filter!(!isempty, reportsary))) + return included_files, reports, generate_postprocess(virtualmod, actualmod) end - λ = generate_virtual_lambda(virtualmod, ret) - # Core.eval(@__MODULE__, :(λ = $(λ))) - interp, = profile_call(λ) + # run profiler on each included file, otherwise file/line number information gets messed + # we can manually fix them, but we will need this kind of logic in the future module + # handling as well, so let's take this way + λs = generate_virtual_lambda.(Ref(virtualmod), map(a->a.transformed, ret)) + interp = TPInterpreter(; istoplevel = true) + for λ in λs + profile_call_gf!(interp, Tuple{typeof(λ)}) + end - return interp.reports, generate_postprocess(virtualmod, actualmod) + return included_files, interp.reports, generate_postprocess(virtualmod, actualmod) end generate_virtual_module(actualmod::Module) = @@ -81,18 +91,6 @@ end # inference # --------- -@nospecialize - -function profile_call(f, args...) - tt = to_tuple_type(typeof′.([f, args...])) - return profile_call_gf(tt) -end - -typeof′(x) = typeof(x) -typeof′(x::Type{T}) where {T} = Type{T} - -@specialize - # FIXME: # cached method specializations won't let abstract interpretation to happen again and # so may suppress error reports @@ -137,6 +135,18 @@ function profile_call_gf!(interp::TPInterpreter, end # utility for interactive session +@nospecialize + +function profile_call(f, args...) + tt = to_tuple_type(typeof′.([f, args...])) + return profile_call_gf(tt) +end + +typeof′(x) = typeof(x) +typeof′(x::Type{T}) where {T} = Type{T} + +@specialize + macro profile_call(ex, kwargs...) @assert Meta.isexpr(ex, :call) "function call expression should be given" f = ex.args[1] diff --git a/src/virtualprocess.jl b/src/virtualprocess.jl index 885645a7c..b90661173 100644 --- a/src/virtualprocess.jl +++ b/src/virtualprocess.jl @@ -1,20 +1,34 @@ # TODO: -# profiling on unloaded package -# - support `module` -# - special case `__init__`/`include` calls +# - profiling on unloaded package +# * support `module` +# * special case `__init__` calls +# - respect evaluation order; currently `parse_and_transform` evaluates "toplevel definitions" +# in batch but it misses errors because of "not-yet-defined" definitions + +const Transformed = let + sym2typ = ( + :filename => String, + :transformed => Expr, + :reports => Vector{ToplevelErrorReport} + ) + NamedTuple{first.(sym2typ), to_tuple_type(last.(sym2typ))} +end """ parse_and_transform(actualmod::Module, virtualmod::Module, s::AbstractString, filename::AbstractString, - ) -> Union{Expr,Vector{<:ToplevelErrorReport}} + ret::Vector{Transformed} = Transformed[], + ) -> Vector{Transformed} Parses `s` into a toplevel expression and transforms the resulting expression so that the - final output expression can be wrapped into a virtual function to be profiled in - `virtualmod`. -Returns `Vector{<:ToplevelErrorReport}` if there are any error found during the text parsing - and AST transformation. + output expression can be wrapped into a virtual function to be profiled in `virtualmod`. + +Returns `Vector{Transformed}` where `Transformed`-element keeps the following information: +- `filename`: file that has been parsed +- `transformed`: transformed `:toplevel` expression +- `reports`: errors found during the text parsing and AST transformation The AST transformation includes: - try to extract toplevel "defintions" and directly evaluate them in a context of @@ -23,6 +37,7 @@ The AST transformation includes: * `macro` definition * `struct`, `abstract` and `primitive` type definition * `import`/`using` statements + * handle `include` - try to expand macros in a context of `virtualmod` - fix self-referring dot accessors (i.e. those referring to `actualmod`) so that it can be constant-propagated in abstract interpretation (otherwise they will be annotated as `Any` @@ -33,21 +48,26 @@ function parse_and_transform(actualmod::Module, virtualmod::Module, s::AbstractString, filename::AbstractString, - )::Union{Expr,Vector{<:ToplevelErrorReport}} + ret::Vector{Transformed} = Transformed[], + )::Vector{Transformed} ex = parse_input_line(s; filename) # if there's any syntax error, try to identify all the syntax error location - isexpr(ex, (:error, :incomplete)) && return collect_syntax_errors(s, filename) + if isexpr(ex, (:error, :incomplete)) + reports = collect_syntax_errors(s, filename) + push!(ret, (; filename, transformed = ex, reports)) + return ret + end @assert isexpr(ex, :toplevel) reports::Vector{ToplevelErrorReport} = ToplevelErrorReport[] line::Int = 1 - file::String = filename + filename::String = filename function macroexpand_err_handler(err, st) # `4` corresponds to `with_err_handling`, `f`, `macroexpand` and its kwfunc st = crop_stacktrace(st, 4) - push!(reports, ActualErrorWrapped(err, st, file, line)) + push!(reports, ActualErrorWrapped(err, st, filename, line)) return nothing end macroexpand_with_err_handling(mod, x) = return with_err_handling(macroexpand_err_handler) do @@ -56,24 +76,23 @@ function parse_and_transform(actualmod::Module, function eval_err_handler(err, st) # `3` corresponds to `with_err_handling`, `f` and `eval` st = crop_stacktrace(st, 3) - push!(reports, ActualErrorWrapped(err, st, file, line)) + push!(reports, ActualErrorWrapped(err, st, filename, line)) return nothing end eval_with_err_handling(mod, x) = return with_err_handling(eval_err_handler) do Core.eval(mod, x) end - # defined constant self-referring variable so that global references using it can be + # define constant self-referring variable so that global references using it can be # constant propagated actualmodsym = Symbol(actualmod) constmodsym = gensym(actualmodsym) Core.eval(virtualmod, :(const $(constmodsym) = $(actualmodsym))) - ret = walk_and_transform!(ex, Symbol[]) do x, scope - # update file/line info + function walker(x, scope) + # update line info if x isa LineNumberNode line = x.line - # file = x.file # NOTE: will be needed when this function handles `include` calls return x end @@ -84,56 +103,78 @@ function parse_and_transform(actualmod::Module, # always escape inside expression if :quote in scope - x + return x + end # evaluate these toplevel expressions only when not in function scope: # we need this because otherwise these invalid expressions can be "extracted" and # evaled wrongly while they actually cause syntax errors - elseif isexpr(x, (:macro, :abstract, :struct, :primitive)) - leftover = if :function ∉ scope + if isexpr(x, (:macro, :abstract, :struct, :primitive)) + return if :function ∉ scope eval_with_err_handling(virtualmod, x) else - report = SyntaxErrorReport("syntax: \"$(x.head)\" expression not at top level", file, line) + report = SyntaxErrorReport("syntax: \"$(x.head)\" expression not at top level", filename, line) push!(reports, report) nothing end - :($(leftover)) + end # TODO: enable profiling on module (i.e. without actual loading, combined with `include` support) - elseif isexpr(x, (:import, :using)) - leftover = if :function ∉ scope + if isexpr(x, (:import, :using)) + return if :function ∉ scope eval_with_err_handling(virtualmod, x) else - report = SyntaxErrorReport("syntax: \"$(x.head)\" expression not at top level", file, line) + report = SyntaxErrorReport("syntax: \"$(x.head)\" expression not at top level", filename, line) push!(reports, report) nothing end - :($(leftover)) + end # hoist toplevel function definitions - elseif !islocalscope(scope) && isfuncdef(x) - leftover = eval_with_err_handling(virtualmod, x) - :($(leftover)) + if !islocalscope(scope) && isfuncdef(x) + return eval_with_err_handling(virtualmod, x) + end # remove `const` annotation - elseif isexpr(x, :const) - first(x.args) + if isexpr(x, :const) + return first(x.args) + end + + # handle `include` call + if isinclude(x) + # can't correctly eval expressions with accessing global variables, etc + include_file = eval_with_err_handling(virtualmod, last(x.args)) + + # error happened when evaling + isnothing(include_file) && return nothing + + include_file = normpath(dirname(filename), include_file) + if isfile(include_file) + include_text = read(include_file, String) + + parse_and_transform(actualmod, virtualmod, include_text, include_file, ret) + + # try to get last expression + include_transformed = last(ret).transformed + isexpr(include_transformed, :toplevel) || return nothing + return last(include_transformed.args) + else + error("implement error report for this pass") # TODO + end + end # fix self-referring global references - elseif isexpr(x, :.) && first(x.args) === actualmodsym + if isexpr(x, :.) && first(x.args) === actualmodsym x.args[1] = constmodsym - x - - else - x + return x end - end - return if isempty(reports) - ret - else - reports # non-empty `reports` means critical errors happened + return x end + + transformed = walk_and_transform!(walker, ex, Symbol[]) + push!(ret, (; filename, transformed, reports)) + return ret end function collect_syntax_errors(s, filename) @@ -189,6 +230,8 @@ function isfuncdef(ex) return false end +isinclude(ex) = isexpr(ex, :call) && first(ex.args) === :include + # don't inline this so we can find it in the stacktrace @noinline function with_err_handling(f, err_handler) return try diff --git a/src/watch.jl b/src/watch.jl index 7bad05d5b..adbcdef67 100644 --- a/src/watch.jl +++ b/src/watch.jl @@ -2,44 +2,42 @@ profile_and_watch_file(args...; kwargs...) = profile_and_watch_file(stdout, args function profile_and_watch_file(io::IO, filename::AbstractString, args...; - profiling_logger::Union{Nothing,IO} = io, - print_inference_sucess::Bool = true, + profiling_logger::Union{Nothing,IO} = io, # enable logger by default for watch mode kwargs...) - errored = true while true - do_prehooks(errored, - io, + do_prehooks(io, filename, args...; profiling_logger, - print_inference_sucess, kwargs...) - try + included_files, _ = try println(io) - errored = profile_file(io, filename, args...; - profiling_logger, - # no success message after profiling without error found - # unless specified manually - print_inference_sucess = print_inference_sucess && errored, - kwargs...) + profile_file(io, filename, args...; + profiling_logger, + kwargs...) catch err @error "internal error occured:" showerror(stderr, err, catch_backtrace()) + [filename], true end - t = watch_file(filename) + # TODO: is there a way to abort tasks that are defeated in the race ? + watch_chn = Channel{FileWatching.FileEvent}() + for file in included_files + @async put!(watch_chn, watch_file(file)) + end + take!(watch_chn) # wait until any of the scheduled tasks fulfilled + if !isfile(filename) @info "$(filename) has been removed" return end - do_posthooks(errored, - io, + do_posthooks(io, filename, args...; profiling_logger, - print_inference_sucess, kwargs...) end end