diff --git a/src/Loess.jl b/src/Loess.jl index a6f1483..d78e97d 100644 --- a/src/Loess.jl +++ b/src/Loess.jl @@ -10,9 +10,9 @@ export loess, predict include("kd.jl") -mutable struct LoessModel{T <: AbstractFloat} - xs::AbstractMatrix{T} # An n by m predictor matrix containing n observations from m predictors - ys::AbstractVector{T} # A length n response vector +struct LoessModel{T <: AbstractFloat} + xs::Matrix{T} # An n by m predictor matrix containing n observations from m predictors + ys::Vector{T} # A length n response vector predictions_and_gradients::Dict{Vector{T}, Vector{T}} # kd-tree vertexes mapped to prediction and gradient at each vertex kdtree::KDTree{T} end @@ -44,6 +44,10 @@ function loess( degree::Integer = 2, cell::AbstractFloat = 0.2 ) where T<:AbstractFloat + + Base.require_one_based_indexing(xs) + Base.require_one_based_indexing(ys) + if size(xs, 1) != size(ys, 1) throw(DimensionMismatch("Predictor and response arrays must of the same length")) end @@ -80,8 +84,12 @@ function loess( end # distance to each point - for i in 1:n - ds[i] = euclidean(vec(vert), vec(xs[i,:])) + @inbounds for i in 1:n + s = zero(T) + for j in 1:m + s += (xs[i, j] - vert[j])^2 + end + ds[i] = sqrt(s) end # find the q closest points @@ -128,7 +136,7 @@ function loess( ] end - LoessModel{T}(xs, ys, predictions_and_gradients, kdtree) + LoessModel(xs, ys, predictions_and_gradients, kdtree) end loess(xs::AbstractVector{T}, ys::AbstractVector{T}; kwargs...) where {T<:AbstractFloat} = @@ -153,50 +161,44 @@ end # Returns: # A length n' vector of predicted response values. # -function predict(model::LoessModel, z::Real) - predict(model, [z]) -end - -function predict(model::LoessModel, zs::AbstractVector) - - Base.require_one_based_indexing(zs) +function predict(model::LoessModel{T}, z::Number) where T + adjacent_verts = traverse(model.kdtree, (T(z),)) - m = size(model.xs, 2) + @assert(length(adjacent_verts) == 2) + v₁, v₂ = adjacent_verts[1][1], adjacent_verts[2][1] - # in the univariate case, interpret a non-singleton zs as vector of - # ponits, not one point - if m == 1 && length(zs) > 1 - return predict(model, reshape(zs, (length(zs), 1))) + if z == v₁ || z == v₂ + return first(model.predictions_and_gradients[[z]]) end - if length(zs) != m - error("$(m)-dimensional model applied to length $(length(zs)) vector") - end + y₁, dy₁ = model.predictions_and_gradients[[v₁]] + y₂, dy₂ = model.predictions_and_gradients[[v₂]] - adjacent_verts = traverse(model.kdtree, zs) + b_int = cubic_interpolation(v₁, y₁, dy₁, v₂, y₂, dy₂) - if m == 1 - @assert(length(adjacent_verts) == 2) - z = zs[1] - v₁, v₂ = adjacent_verts[1][1], adjacent_verts[2][1] + return evalpoly(z, b_int) +end - if z == v₁ || z == v₂ - return first(model.predictions_and_gradients[[z]]) - end +function predict(model::LoessModel, zs::AbstractVector) + if size(model.xs, 2) > 1 + throw(ArgumentError("multivariate blending not yet implemented")) + end - y₁, dy₁ = model.predictions_and_gradients[[v₁]] - y₂, dy₂ = model.predictions_and_gradients[[v₂]] + return [predict(model, z) for z in zs] +end - b_int = cubic_interpolation(v₁, y₁, dy₁, v₂, y₂, dy₂) +function predict(model::LoessModel, zs::AbstractMatrix) + if size(model.xs, 2) != size(zs, 2) + throw(DimensionMismatch("number of columns in input matrix must match the number of columns in the model matrix")) + end - return evalpoly(z, b_int) + if size(zs, 2) == 1 + return predict(model, vec(zs)) else - error("Multivariate blending not yet implemented") + return [predict(model, row) for row in eachrow(zs)] end end -predict(model::LoessModel, zs::AbstractMatrix) = map(Base.Fix1(predict, model), eachrow(zs)) - """ tricubic(u) diff --git a/src/kd.jl b/src/kd.jl index f010927..9a4dd1c 100644 --- a/src/kd.jl +++ b/src/kd.jl @@ -1,22 +1,17 @@ # Simple static kd-trees. -abstract type KDNode end - -struct KDLeafNode <: KDNode -end - -struct KDInternalNode{T <: AbstractFloat} <: KDNode +struct KDNode{T <: AbstractFloat} j::Int # dimension on which the data is split med::T # median value where the split occours - leftnode::KDNode - rightnode::KDNode + leftnode::Union{Nothing, KDNode{T}} + rightnode::Union{Nothing, KDNode{T}} end struct KDTree{T <: AbstractFloat} - xs::AbstractMatrix{T} # A matrix of n, m-dimensional observations + xs::Matrix{T} # A matrix of n, m-dimensional observations perm::Vector{Int} # permutation of data to avoid modifying xs - root::KDNode # root node + root::KDNode{T} # root node verts::Set{Vector{T}} bounds::Matrix{T} # Top-level bounding box end @@ -114,7 +109,7 @@ Modifies: `perm`, `verts` Returns: - Either a `KDLeafNode` or a `KDInternalNode` + Either a `nothing` or a `KDNode` """ function build_kdtree(xs::AbstractMatrix{T}, perm::AbstractVector, @@ -130,7 +125,7 @@ function build_kdtree(xs::AbstractMatrix{T}, if length(perm) <= leaf_size_cutoff || diameter(bounds) <= leaf_diameter_cutoff @debug "Creating leaf node" length(perm) leaf_size_cutoff diameter(bounds) leaf_diameter_cutoff - return KDLeafNode() + return nothing end # split on the dimension with the largest spread @@ -226,7 +221,7 @@ function build_kdtree(xs::AbstractMatrix{T}, push!(verts, T[vert...]) end - KDInternalNode{T}(j, med, leftnode, rightnode) + KDNode(j, med, leftnode, rightnode) end @@ -246,14 +241,15 @@ end Traverse the tree `kdtree` to the bottom and return the verticies of the bounding hypercube of the leaf node containing the point `x`. """ -function traverse(kdtree::KDTree, x::AbstractVector) +function traverse(kdtree::KDTree{T}, x::NTuple{N,T}) where {N,T} + m = size(kdtree.bounds, 2) - if length(x) != m + if N != m throw(DimensionMismatch("$(m)-dimensional kd-tree searched with a length $(length(x)) vector.")) end - for j in 1:m + for j in 1:N if x[j] < kdtree.bounds[1, j] || x[j] > kdtree.bounds[2, j] error( """ @@ -266,15 +262,18 @@ function traverse(kdtree::KDTree, x::AbstractVector) bounds = copy(kdtree.bounds) node = kdtree.root - while !isa(node, KDLeafNode) - if x[node.j] <= node.med - bounds[2, node.j] = node.med - node = node.leftnode - else - bounds[1, node.j] = node.med - node = node.rightnode - end - end - bounds_verts(bounds) + return _traverse!(bounds, node, x) +end + +_traverse!(bounds, node::Nothing, x) = bounds +function _traverse!(bounds, node::KDNode, x) + if x[node.j] <= node.med + bounds[2, node.j] = node.med + return _traverse!(bounds, node.leftnode, x) + else + bounds[1, node.j] = node.med + return _traverse!(bounds, node.rightnode, x) + end end +