diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 33aea4f..4f56ea3 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -32,7 +32,6 @@ jobs: git fetch origin +:refs/remotes/origin/HEAD julia --project=benchmark/ -e ' using Pkg - Pkg.develop(PackageSpec(path=pwd())) Pkg.instantiate()' # Pkg.update() allows us to benchmark even when dependencies/compat requirements change julia --project=benchmark/ -e ' diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7cd138e..e9862d5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,8 +16,9 @@ jobs: fail-fast: false matrix: version: - - '1.6' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'. - - '1' # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia. + - 'min' + - 'lts' + - '1' - 'pre' os: - ubuntu-latest diff --git a/Project.toml b/Project.toml index d536a9e..02ebd23 100644 --- a/Project.toml +++ b/Project.toml @@ -7,12 +7,15 @@ Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0" +StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" [compat] Distances = "0.7, 0.8, 0.9, 0.10" +LinearAlgebra = "1" Statistics = "1" StatsAPI = "1.1" -julia = "1.6" +StatsFuns = "1" +julia = "1.8" [extras] RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" diff --git a/benchmark/Project.toml b/benchmark/Project.toml index e276858..05cae9d 100644 --- a/benchmark/Project.toml +++ b/benchmark/Project.toml @@ -7,4 +7,7 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" [compat] BenchmarkTools = "1" PkgBenchmark = "0.2" -julia = "1" +julia = "1.11" + +[sources] +Loess = { path = ".." } diff --git a/src/Loess.jl b/src/Loess.jl index 7e65b74..a635cff 100644 --- a/src/Loess.jl +++ b/src/Loess.jl @@ -4,6 +4,7 @@ import Distances: euclidean import StatsAPI: fitted, modelmatrix, predict, residuals, response using Statistics, LinearAlgebra +using StatsFuns: tdistinvcdf export loess, fitted, modelmatrix, predict, residuals, response @@ -11,9 +12,18 @@ include("kd.jl") struct LoessModel{T <: AbstractFloat} - xs::Matrix{T} # An n by m predictor matrix containing n observations from m predictors - ys::Vector{T} # A length n response vector - predictions_and_gradients::Dict{Vector{T}, Vector{T}} # kd-tree vertexes mapped to prediction and gradient at each vertex + # An n by m predictor matrix containing n observations from m predictors + xs::Matrix{T} + # A length n response vector + ys::Vector{T} + # kd-tree vertexes mapped to prediction and gradient at each vertex + predictions_and_gradients::Dict{Vector{T}, Vector{T}} + # kd-tree vertexes mapped to generating vectors for prediction (first row) + # and gradient (second row) at each vertex. The values values of the + # predictions_and_gradients field are the values of hatmatrix_generator + # multiplied by ys + hatmatrix_generator::Dict{Vector{T}, Matrix{T}} + # The kd-tree kdtree::KDTree{T} end @@ -61,8 +71,10 @@ function _loess( # Initialize the regression arrays us = Array{T}(undef, q, 1 + degree * m) - du1dt = zeros(T, m, 1 + degree * m) vs = Array{T}(undef, q) + # The hat matrix and the derivative at the vertices of kdtree + F = Dict{Vector{T},Matrix{T}}() + for vert in kdtree.verts # reset perm @@ -84,46 +96,56 @@ function _loess( dmax = maximum([ds[perm[i]] for i = 1:q]) dmax = iszero(dmax) ? one(dmax) : dmax + # We center the predictors since this will greatly simplify subsequent + # calculations. With centering, the intercept is the prediction at the + # the vertex and the derivative is linear coefficient. + # + # We still only support m == 1 so we hard code for m=1 below. If support + # for more predictors is introduced then this code would have to be adjusted. + # + # See Cleveland and Grosse page 54 column 1. The results assume centering + # but I'm not sure, the centering is explicitly stated in the paper. + x₁ = xs[perm[1], 1] for i in 1:q pᵢ = perm[i] w = sqrt(tricubic(ds[pᵢ] / dmax)) us[i, 1] = w - for j in 1:m - x = xs[pᵢ, j] + for j in 1:m # we still only support m == 1 + x = xs[pᵢ, j] - x₁ # center wxl = w for l in 1:degree wxl *= x us[i, 1 + (j - 1)*degree + l] = wxl # w*x^l end end - vs[i] = ys[pᵢ] * w + vs[i] = ys[pᵢ] end - # Compute the gradient of the vertex - pᵢ = perm[1] - for j in 1:m - x = xs[pᵢ, j] - xl = one(x) - for l in 1:degree - du1dt[j, 1 + (j - 1)*degree + l] = l * xl - xl *= x - end - end + Fact = svd(us) - if VERSION < v"1.7.0-DEV.1188" - F = qr(us, Val(true)) - else - F = qr(us, ColumnNorm()) - end - coefs = F\vs + # compute the inverse of the singular values with thresholding similar + # to pinv + Sinv = map!(t -> t > eps(one(t)) * q ? inv(t) : zero(t), Fact.S, Fact.S) + # Compute S⁻¹ * Uᵗ * W in place + Fact.U .*= view(us, :, 1) .* Sinv' + # Finalize the pseudo inverse + Ftmp = Fact.V * Fact.U' + FF = zeros(T, 2, n) + FF[:, view(perm, 1:q)] = view(Ftmp, 1:2, :) + F[vert] = FF + + coefs = Ftmp * vs - predictions_and_gradients[vert] = [ - us[1, :]' * coefs; # the prediction - du1dt * coefs # the gradient of the prediction - ] + predictions_and_gradients[vert] = coefs[1:2] end - LoessModel(convert(Matrix{T}, xs), convert(Vector{T}, ys), predictions_and_gradients, kdtree) + LoessModel( + convert(Matrix{T}, xs), + convert(Vector{T}, ys), + predictions_and_gradients, + F, + kdtree, + ) end """ @@ -179,14 +201,14 @@ end # Returns: # A length n' vector of predicted response values. # -function predict(model::LoessModel{T}, z::Number) where T - adjacent_verts = traverse(model.kdtree, (T(z),)) +function predict(model::LoessModel{T}, x::Number) where T + adjacent_verts = traverse(model.kdtree, (T(x),)) @assert(length(adjacent_verts) == 2) v₁, v₂ = adjacent_verts[1][1], adjacent_verts[2][1] - if z == v₁ || z == v₂ - return first(model.predictions_and_gradients[[z]]) + if x == v₁ || x == v₂ + return first(model.predictions_and_gradients[[x]]) end y₁, dy₁ = model.predictions_and_gradients[[v₁]] @@ -194,26 +216,92 @@ function predict(model::LoessModel{T}, z::Number) where T b_int = cubic_interpolation(v₁, y₁, dy₁, v₂, y₂, dy₂) - return evalpoly(z, b_int) + return evalpoly(x, b_int) +end + +struct LoessPrediction{T} + predictions::Vector{T} + lower::Vector{T} + upper::Vector{T} + sₓ::Vector{T} + δ₁::T + δ₂::T + s::T + ρ::T end -function predict(model::LoessModel, zs::AbstractVector) +""" + predict( + model::LoessModel, + x::AbstractVecOrMat = modelmatrix(model); + interval::Union{Symbol, Nothing}=nothing, + level::Real=0.95 + ) + +Compute predictions from the fitted Loess `model` at the values in `x`. +By default, only the predictions are returned. If `interval` is set to +:confidence` then a `LoessPrediction` struct is returned which contains +upper and lower confidence bounds at `level` as well the the quantities +used for computing the confidence bounds. + +When the prodictor takes values at the edges of the KD tree, the predictions +are already computed and the function is simply a look-up. For other values +of the preditor, a cubic spline approximation is used for the prediction. + +When confidence bounds are requested, a matrix of size ``n \times n`` is +constructed where ``n`` is the length of the fitted data vector. Hence, +this caluculation is only feasible when ``n`` is not too large. For details +on the calculations, see the cited reference. + +Reference: Cleveland, William S., and Eric Grosse. "Computational methods +for local regression." Statistics and computing 1, no. 1 (1991): 47-62. +""" +function predict( + model::LoessModel, + x::AbstractVecOrMat = modelmatrix(model); + interval::Union{Symbol, Nothing}=nothing, + level::Real=0.95 +) if size(model.xs, 2) > 1 throw(ArgumentError("multivariate blending not yet implemented")) end + # FIXME! Adjust this when multivariate predictors are supported + x_v = vec(x) - return [predict(model, z) for z in zs] -end + if interval !== nothing && interval !== :confidence + if interval === :prediction + throw(ArgumentError("predictions not implemented. If you know how to compute them then please file an issue and explain how.")) + end + throw(ArgumentError(LazyString("interval must be either :prediction or :confidence but was :", interval))) + end -function predict(model::LoessModel, zs::AbstractMatrix) - if size(model.xs, 2) != size(zs, 2) - throw(DimensionMismatch("number of columns in input matrix must match the number of columns in the model matrix")) + if !(0 < level < 1) + throw(ArgumentError(LazyString("level must be between zero and one but was ", level))) end - if size(zs, 2) == 1 - return predict(model, vec(zs)) + predictions = [predict(model, _x) for _x in x_v] + + if interval === nothing + return predictions else - return [predict(model, row) for row in eachrow(zs)] + # see Cleveland and Grosse 1991 p.50. + L = hatmatrix(model) + L̄ = L - I + L̄L̄ = L̄' * L̄ + δ₁ = tr(L̄L̄) + δ₂ = sum(abs2, L̄L̄) + ε̂ = L̄ * model.ys + s = sqrt(sum(abs2, ε̂) / δ₁) + ρ = δ₁^2 / δ₂ + qt = tdistinvcdf(ρ, (1 + level) / 2) + sₓ = if model.xs === x + [s * norm(view(L, i, :)) for i in 1:length(x)] + else + [s * norm(_hatmatrix_x(model, _x)) for _x in x_v] + end + lower = muladd(-qt, sₓ, predictions) + upper = muladd(qt, sₓ, predictions) + return LoessPrediction(predictions, lower, upper, sₓ, δ₁, δ₂, s, ρ) end end @@ -221,6 +309,40 @@ fitted(model::LoessModel) = predict(model, modelmatrix(model)) residuals(model::LoessModel) = response(model) .- fitted(model) +function hatmatrix(model::LoessModel{T}) where T + n = length(model.ys) + L = zeros(T, n, n) + for (i, r) in enumerate(eachrow(model.xs)) + z = only(r) # we still only support one predictor + L[i, :] = _hatmatrix_x(model, z) + end + return L +end + +# Compute elements of the hat matrix of `model` at `x`. +# See Cleveland and Grosse 1991 p. 54. +function _hatmatrix_x(model::LoessModel{T}, x::Number) where T + n = length(model.ys) + + adjacent_verts = traverse(model.kdtree, (T(x),)) + + @assert(length(adjacent_verts) == 2) + v₁, v₂ = adjacent_verts[1], adjacent_verts[2] + + if x == v₁ || x == v₂ + Lx = model.hatmatrix_generator[[x]][1, :] + else + Lx = zeros(T, n) + Lv₁ = model.hatmatrix_generator[[v₁]] + Lv₂ = model.hatmatrix_generator[[v₂]] + for j in 1:n + b_int = cubic_interpolation(v₁, Lv₁[1, j], Lv₁[2, j], v₂, Lv₂[1, j], Lv₂[2, j]) + Lx[j] = evalpoly(x, b_int) + end + end + return Lx +end + """ tricubic(u) diff --git a/test/runtests.jl b/test/runtests.jl index 9bcfd9c..4ac773b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -58,7 +58,6 @@ end # not great tolerance but loess also struggles to capture the sine peaks @test abs(predict(model, i * π + π / 2 )) ≈ 0.9 atol=0.1 end - end @test_throws DimensionMismatch loess([1.0 2.0; 3.0 4.0], [1.0]) @@ -93,18 +92,31 @@ end @testset "predict" begin # In R this is `predict(cars.lo, data.frame(speed = seq(5, 25, 1)))`. - Rvals = [7.797353, 10.002308, 12.499786, 15.281082, 18.446568, 21.865315, 25.517015, 29.350386, 33.230660, 37.167935, 41.205226, 45.055736, 48.355889, 49.824812, 51.986702, 56.461318, 61.959729, 68.569313, 76.316068, 85.212121, 95.324047] - - for (x, Ry) in zip(5:25, Rvals) - if 8 <= x <= 22 - @test predict(ft, x) ≈ Ry rtol = 1e-7 - else - # The outer vertices are expanded by 0.105 in the original implementation. Not sure if we - # want to do the same thing so meanwhile the results will deviate slightly between the - # outermost vertices - @test predict(ft, x) ≈ Ry rtol = 1e-3 - end + R_fit = [7.797353, 10.002308, 12.499786, 15.281082, 18.446568, 21.865315, 25.517015, 29.350386, 33.230660, 37.167935, 41.205226, 45.055736, 48.355889, 49.824812, 51.986702, 56.461318, 61.959729, 68.569313, 76.316068, 85.212121, 95.324047] + R_se_fit = [7.568120, 5.945831, 4.990827, 4.545284, 4.308639, 4.115049, 3.789542, 3.716231, 3.776947, 4.091747, 4.709568, 4.245427, 4.035929, 3.753410, 4.004705, 4.043190, 4.026105, 4.074664, 4.570818, 5.954217, 8.302014] + preds = predict(ft, 5:25; interval = :confidence) + # The calcalations of the δs in code of Cleveland and Grosse are + # based on a statistical approximation, see section 4 of their 1991 + # paper. We use the exact but expensive definition so the values + # are slightly different + @test preds.s ≈ 15.29496 rtol = 1e-3 + @test preds.ρ ≈ 44.6179 rtol = 5e-3 + + for (i, Ry) in enumerate(R_fit) + # The outer vertices are expanded by 0.105 in the original implementation. Not sure if we + # want to do the same thing so meanwhile the results will deviate slightly between the + # outermost vertices + @test preds.predictions[i] ≈ Ry rtol = (4 <= i <= 18) ? 1e-7 : 1e-3 + @test preds.sₓ[i] ≈ R_se_fit[i] rtol = 1e-3 end + + preds_all = predict(ft; interval = :confidence) + @test preds_all.s ≈ 15.29496 rtol = 1e-3 + @test preds_all.ρ ≈ 44.6179 rtol = 5e-3 + @test preds_all.sₓ ≈ [9.885466, 9.885466, 4.990827, 4.990827, 4.545284, 4.308639, 4.115049, 4.115049, 4.115049, 3.789542, 3.789542, 3.716231, 3.716231, 3.716231, 3.716231, 3.776947, 3.776947, 3.776947, 3.776947, 4.091747, 4.091747, 4.091747, 4.091747, 4.709568, 4.709568, 4.709568, 4.245427, 4.245427, 4.035929, 4.035929, 4.035929, 3.753410, 3.753410, 3.753410, 3.753410, 4.004705, 4.004705, 4.004705, 4.043190, 4.043190, 4.043190, 4.043190, 4.043190, 4.074664, 4.570818, 5.954217, 5.954217, 5.954217, 5.954217, 8.302014] rtol = 1e-2 + + @test_throws ArgumentError predict(ft, 5:25; interval = :x) + @test_throws ArgumentError predict(ft, 5:25; interval = :prediction) end end