From 852ba080531b3020b4703897a22003641c7c89c0 Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Fri, 24 Mar 2023 10:08:12 +0100 Subject: [PATCH 1/5] Reduce allocations --- src/Loess.jl | 54 +++++++++++++++++++++------------------------------- src/kd.jl | 7 ++++--- 2 files changed, 26 insertions(+), 35 deletions(-) diff --git a/src/Loess.jl b/src/Loess.jl index a6f1483..6f0197e 100644 --- a/src/Loess.jl +++ b/src/Loess.jl @@ -153,49 +153,39 @@ end # Returns: # A length n' vector of predicted response values. # -function predict(model::LoessModel, z::Real) - predict(model, [z]) -end - -function predict(model::LoessModel, zs::AbstractVector) - - Base.require_one_based_indexing(zs) +function predict(model::LoessModel{T}, z::Number) where T + adjacent_verts = traverse(model.kdtree, (T(z),)) - m = size(model.xs, 2) + @assert(length(adjacent_verts) == 2) + v₁, v₂ = adjacent_verts[1][1], adjacent_verts[2][1] - # in the univariate case, interpret a non-singleton zs as vector of - # ponits, not one point - if m == 1 && length(zs) > 1 - return predict(model, reshape(zs, (length(zs), 1))) + if z == v₁ || z == v₂ + return first(model.predictions_and_gradients[[z]]) end - if length(zs) != m - error("$(m)-dimensional model applied to length $(length(zs)) vector") - end - - adjacent_verts = traverse(model.kdtree, zs) + y₁, dy₁ = model.predictions_and_gradients[[v₁]] + y₂, dy₂ = model.predictions_and_gradients[[v₂]] - if m == 1 - @assert(length(adjacent_verts) == 2) - z = zs[1] - v₁, v₂ = adjacent_verts[1][1], adjacent_verts[2][1] + b_int = cubic_interpolation(v₁, y₁, dy₁, v₂, y₂, dy₂) - if z == v₁ || z == v₂ - return first(model.predictions_and_gradients[[z]]) - end + return evalpoly(z, b_int) +end - y₁, dy₁ = model.predictions_and_gradients[[v₁]] - y₂, dy₂ = model.predictions_and_gradients[[v₂]] +function predict(model::LoessModel, zs::AbstractVector) + if size(model.xs, 2) > 1 + throw(ArgumentError("Multivariate blending not yet implemented")) + end - b_int = cubic_interpolation(v₁, y₁, dy₁, v₂, y₂, dy₂) + predict.(Ref(model), zs) +end - return evalpoly(z, b_int) - else - error("Multivariate blending not yet implemented") +function predict(model::LoessModel, zs::AbstractMatrix) + if size(model.xs, 2) > 1 + throw(ArgumentError("Multivariate blending not yet implemented")) end -end -predict(model::LoessModel, zs::AbstractMatrix) = map(Base.Fix1(predict, model), eachrow(zs)) + return [predict(model, z) for z in vec(zs)] +end """ tricubic(u) diff --git a/src/kd.jl b/src/kd.jl index f010927..3f49aa9 100644 --- a/src/kd.jl +++ b/src/kd.jl @@ -246,14 +246,15 @@ end Traverse the tree `kdtree` to the bottom and return the verticies of the bounding hypercube of the leaf node containing the point `x`. """ -function traverse(kdtree::KDTree, x::AbstractVector) +function traverse(kdtree::KDTree{T}, x::NTuple{N,T}) where {N,T} + m = size(kdtree.bounds, 2) - if length(x) != m + if N != m throw(DimensionMismatch("$(m)-dimensional kd-tree searched with a length $(length(x)) vector.")) end - for j in 1:m + for j in 1:N if x[j] < kdtree.bounds[1, j] || x[j] > kdtree.bounds[2, j] error( """ From 54e822eaa2cc7738cb4873439089805180a79be7 Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Wed, 19 Apr 2023 14:14:24 +0200 Subject: [PATCH 2/5] Type stability --- src/Loess.jl | 10 +++++----- src/kd.jl | 38 +++++++++++++++++++++----------------- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/src/Loess.jl b/src/Loess.jl index 6f0197e..a42d301 100644 --- a/src/Loess.jl +++ b/src/Loess.jl @@ -10,11 +10,11 @@ export loess, predict include("kd.jl") -mutable struct LoessModel{T <: AbstractFloat} - xs::AbstractMatrix{T} # An n by m predictor matrix containing n observations from m predictors - ys::AbstractVector{T} # A length n response vector +struct LoessModel{T <: AbstractFloat, N <: KDNode} + xs::Matrix{T} # An n by m predictor matrix containing n observations from m predictors + ys::Vector{T} # A length n response vector predictions_and_gradients::Dict{Vector{T}, Vector{T}} # kd-tree vertexes mapped to prediction and gradient at each vertex - kdtree::KDTree{T} + kdtree::KDTree{T, N} end """ @@ -128,7 +128,7 @@ function loess( ] end - LoessModel{T}(xs, ys, predictions_and_gradients, kdtree) + LoessModel(xs, ys, predictions_and_gradients, kdtree) end loess(xs::AbstractVector{T}, ys::AbstractVector{T}; kwargs...) where {T<:AbstractFloat} = diff --git a/src/kd.jl b/src/kd.jl index 3f49aa9..40e17af 100644 --- a/src/kd.jl +++ b/src/kd.jl @@ -5,18 +5,18 @@ abstract type KDNode end struct KDLeafNode <: KDNode end -struct KDInternalNode{T <: AbstractFloat} <: KDNode +struct KDInternalNode{T <: AbstractFloat, LN <: KDNode, RN <: KDNode} <: KDNode j::Int # dimension on which the data is split med::T # median value where the split occours - leftnode::KDNode - rightnode::KDNode + leftnode::LN + rightnode::RN end -struct KDTree{T <: AbstractFloat} - xs::AbstractMatrix{T} # A matrix of n, m-dimensional observations +struct KDTree{T <: AbstractFloat, N <: KDNode} + xs::Matrix{T} # A matrix of n, m-dimensional observations perm::Vector{Int} # permutation of data to avoid modifying xs - root::KDNode # root node + root::N # root node verts::Set{Vector{T}} bounds::Matrix{T} # Top-level bounding box end @@ -226,7 +226,7 @@ function build_kdtree(xs::AbstractMatrix{T}, push!(verts, T[vert...]) end - KDInternalNode{T}(j, med, leftnode, rightnode) + KDInternalNode(j, med, leftnode, rightnode) end @@ -256,6 +256,7 @@ function traverse(kdtree::KDTree{T}, x::NTuple{N,T}) where {N,T} for j in 1:N if x[j] < kdtree.bounds[1, j] || x[j] > kdtree.bounds[2, j] + @show x, kdtree.bounds error( """ Loess cannot perform extrapolation. Predict can only be applied @@ -267,15 +268,18 @@ function traverse(kdtree::KDTree{T}, x::NTuple{N,T}) where {N,T} bounds = copy(kdtree.bounds) node = kdtree.root - while !isa(node, KDLeafNode) - if x[node.j] <= node.med - bounds[2, node.j] = node.med - node = node.leftnode - else - bounds[1, node.j] = node.med - node = node.rightnode - end - end - bounds_verts(bounds) + return _traverse!(bounds, node, x) end + +_traverse!(bounds, node::KDLeafNode, x) = bounds +function _traverse!(bounds, node::KDInternalNode, x) + if x[node.j] <= node.med + bounds[2, node.j] = node.med + return _traverse!(bounds, node.leftnode, x) + else + bounds[1, node.j] = node.med + return _traverse!(bounds, node.rightnode, x) + end +end + From 543d0c8f1071d48774169c812e794e4dda7e000d Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Wed, 19 Apr 2023 22:48:11 +0200 Subject: [PATCH 3/5] Compure Euclidean distance in a loop to avoid allocations --- src/Loess.jl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Loess.jl b/src/Loess.jl index a42d301..58bb0b0 100644 --- a/src/Loess.jl +++ b/src/Loess.jl @@ -80,8 +80,12 @@ function loess( end # distance to each point - for i in 1:n - ds[i] = euclidean(vec(vert), vec(xs[i,:])) + @inbounds for i in 1:n + s = zero(T) + for j in 1:m + s += (xs[i, j] - vert[j])^2 + end + ds[i] = sqrt(s) end # find the q closest points From c342866d1f54a2d6d414ca7e62b87a4c00b415fd Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Thu, 20 Apr 2023 08:27:03 +0200 Subject: [PATCH 4/5] Address review comments --- src/Loess.jl | 18 +++++++++++++----- src/kd.jl | 1 - 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/Loess.jl b/src/Loess.jl index 58bb0b0..893a25b 100644 --- a/src/Loess.jl +++ b/src/Loess.jl @@ -44,6 +44,10 @@ function loess( degree::Integer = 2, cell::AbstractFloat = 0.2 ) where T<:AbstractFloat + + Base.require_one_based_indexing(xs) + Base.require_one_based_indexing(ys) + if size(xs, 1) != size(ys, 1) throw(DimensionMismatch("Predictor and response arrays must of the same length")) end @@ -177,18 +181,22 @@ end function predict(model::LoessModel, zs::AbstractVector) if size(model.xs, 2) > 1 - throw(ArgumentError("Multivariate blending not yet implemented")) + throw(ArgumentError("multivariate blending not yet implemented")) end - predict.(Ref(model), zs) + return [predict(model, z) for z in zs] end function predict(model::LoessModel, zs::AbstractMatrix) - if size(model.xs, 2) > 1 - throw(ArgumentError("Multivariate blending not yet implemented")) + if size(model.xs, 2) != size(zs, 2) + throw(DimensionMismatch("number of columns in input matrix must match the number of columns in the model matrix")) end - return [predict(model, z) for z in vec(zs)] + if size(zs, 2) == 1 + return predict(model, vec(zs)) + else + return [predict(model, row) for row in eachrow(zs)] + end end """ diff --git a/src/kd.jl b/src/kd.jl index 40e17af..d101a7b 100644 --- a/src/kd.jl +++ b/src/kd.jl @@ -256,7 +256,6 @@ function traverse(kdtree::KDTree{T}, x::NTuple{N,T}) where {N,T} for j in 1:N if x[j] < kdtree.bounds[1, j] || x[j] > kdtree.bounds[2, j] - @show x, kdtree.bounds error( """ Loess cannot perform extrapolation. Predict can only be applied From 2569c9b1cf5d69bc1f8731aed7fe510860a16e43 Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Wed, 3 May 2023 22:21:46 +0200 Subject: [PATCH 5/5] Simplify tree representation by using nothing instead of a special leaf type --- src/Loess.jl | 4 ++-- src/kd.jl | 25 ++++++++++--------------- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/src/Loess.jl b/src/Loess.jl index 893a25b..d78e97d 100644 --- a/src/Loess.jl +++ b/src/Loess.jl @@ -10,11 +10,11 @@ export loess, predict include("kd.jl") -struct LoessModel{T <: AbstractFloat, N <: KDNode} +struct LoessModel{T <: AbstractFloat} xs::Matrix{T} # An n by m predictor matrix containing n observations from m predictors ys::Vector{T} # A length n response vector predictions_and_gradients::Dict{Vector{T}, Vector{T}} # kd-tree vertexes mapped to prediction and gradient at each vertex - kdtree::KDTree{T, N} + kdtree::KDTree{T} end """ diff --git a/src/kd.jl b/src/kd.jl index d101a7b..9a4dd1c 100644 --- a/src/kd.jl +++ b/src/kd.jl @@ -1,22 +1,17 @@ # Simple static kd-trees. -abstract type KDNode end - -struct KDLeafNode <: KDNode -end - -struct KDInternalNode{T <: AbstractFloat, LN <: KDNode, RN <: KDNode} <: KDNode +struct KDNode{T <: AbstractFloat} j::Int # dimension on which the data is split med::T # median value where the split occours - leftnode::LN - rightnode::RN + leftnode::Union{Nothing, KDNode{T}} + rightnode::Union{Nothing, KDNode{T}} end -struct KDTree{T <: AbstractFloat, N <: KDNode} +struct KDTree{T <: AbstractFloat} xs::Matrix{T} # A matrix of n, m-dimensional observations perm::Vector{Int} # permutation of data to avoid modifying xs - root::N # root node + root::KDNode{T} # root node verts::Set{Vector{T}} bounds::Matrix{T} # Top-level bounding box end @@ -114,7 +109,7 @@ Modifies: `perm`, `verts` Returns: - Either a `KDLeafNode` or a `KDInternalNode` + Either a `nothing` or a `KDNode` """ function build_kdtree(xs::AbstractMatrix{T}, perm::AbstractVector, @@ -130,7 +125,7 @@ function build_kdtree(xs::AbstractMatrix{T}, if length(perm) <= leaf_size_cutoff || diameter(bounds) <= leaf_diameter_cutoff @debug "Creating leaf node" length(perm) leaf_size_cutoff diameter(bounds) leaf_diameter_cutoff - return KDLeafNode() + return nothing end # split on the dimension with the largest spread @@ -226,7 +221,7 @@ function build_kdtree(xs::AbstractMatrix{T}, push!(verts, T[vert...]) end - KDInternalNode(j, med, leftnode, rightnode) + KDNode(j, med, leftnode, rightnode) end @@ -271,8 +266,8 @@ function traverse(kdtree::KDTree{T}, x::NTuple{N,T}) where {N,T} return _traverse!(bounds, node, x) end -_traverse!(bounds, node::KDLeafNode, x) = bounds -function _traverse!(bounds, node::KDInternalNode, x) +_traverse!(bounds, node::Nothing, x) = bounds +function _traverse!(bounds, node::KDNode, x) if x[node.j] <= node.med bounds[2, node.j] = node.med return _traverse!(bounds, node.leftnode, x)