Extract core of Silverman bandwith estimator to prepare multidimensional support

jmert · jmert · commit 6a284d46db4f · 2025-07-26T12:50:20.000-05:00
This does not yet enable a multidimensional estimator, but we'll need
the functionality to compute covariances in order to do so.
diff --git a/src/bandwidth.jl b/src/bandwidth.jl
@@ -1,3 +1,99 @@
+import LinearAlgebra: Symmetric, cholesky
+
+# Get the effective sample size and (co)variance simultaneously
+#
+#   - Calculate variance via Welford's algorithm:
+#
+#     https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford%27s_online_algorithm
+#
+#   - Calculate the effective sample size, based on weights and the bounds, using the
+#     Kish effective sample size definition:
+#
+#         n_eff = sum(weights)^2 / sum(weights .^ 2)
+#
+#     https://search.r-project.org/CRAN/refmans/svyweight/html/eff_n.html
+#     https://en.wikipedia.org/wiki/Weighted_arithmetic_mean#Reliability_weights
+function _neff_covar(coords::Tuple{Vararg{AbstractVector,N}},
+                     lo::Tuple{Vararg{Any,N}},
+                     hi::Tuple{Vararg{Any,N}},
+                     weights::Union{Nothing,<:AbstractVector}) where {N}
+    T = promote_type(map(_unitless∘eltype, coords)...)
+    wsum = wsqr = isnothing(weights) ? zero(T) : zero(eltype(weights))
+
+    x = zeros(T, N)
+    μ = zeros(T, N)
+    μ₋₁ = zeros(T, N)
+    Σ = zeros(T, N, N)
+    I = isnothing(weights) ? eachindex(coords...) : eachindex(weights, coords...)
+    for ii in I
+        # @. x = coords[ii] * _invunit(typeof(coords[ii]))
+        # !all(@. lo ≤ x ≤ hi) && continue
+        indomain = true
+        for jj in 1:N
+            v = coords[jj][ii]
+            x[jj] = v * oneunit(_invunit(typeof(v)))
+            indomain &= lo[jj] ≤ v ≤ hi[jj]
+        end
+        !indomain && continue
+
+        w = isnothing(weights) ? one(wsum) : weights[ii]
+        wsum += w
+        wsqr += w^2
+        ω = w / wsum
+        ω̄ = one(ω) - ω
+
+        # @. μ₋₁ = μ
+        # @. μ = ω̄ * μ₋₁ + ω * x
+        for jj in 1:N
+            μ₋₁[jj] = μ[jj]
+            μ[jj] = ω̄ * μ[jj] + ω * x[jj]
+        end
+        # @. Σ = ω̄ * Σ + ω * (x - μ) * (x - μ₋₁)'
+        #  with only the upper-triangle filled
+        for jj in 1:N
+            yy = x[jj] - μ₋₁[jj]
+            for kk in 1:jj
+                Σ[kk,jj] = ω̄ * Σ[kk, jj] + ω * (x[kk] - μ[kk]) * yy
+            end
+        end
+    end
+    neff = wsum^2 / wsqr
+    return neff, Symmetric(Σ)
+end
+
+# specialize for 1D case where the variance is scalar, so allocating arrays can be avoided
+function _neff_covar(coords::Tuple{AbstractVector},
+                     lo::Tuple{Any},
+                     hi::Tuple{Any},
+                     weights::Union{Nothing,<:AbstractVector})
+    T = promote_type(map(_unitless∘eltype, coords)...)
+    wsum = wsqr = isnothing(weights) ? zero(T) : zero(eltype(weights))
+
+    x = zero(T)
+    μ = zero(T)
+    μ₋₁ = zero(T)
+    Σ = zero(T)
+    I = isnothing(weights) ? eachindex(coords...) : eachindex(weights, coords...)
+    for ii in I
+        v = coords[1][ii]
+        lo[1] ≤ v ≤ hi[1] || continue
+        x = v * oneunit(_invunit(typeof(v)))
+
+        w = isnothing(weights) ? one(wsum) : weights[ii]
+        wsum += w
+        wsqr += w^2
+        ω = w / wsum
+        ω̄ = one(ω) - ω
+
+        μ₋₁ = μ
+        μ = ω̄ * μ + ω * x
+        Σ = ω̄ * Σ + ω * (x - μ) * (x - μ₋₁)
+    end
+    neff = wsum^2 / wsqr
+    return neff, Σ
+end
+
+
 """
     SilvermanBandwidth <: AbstractBandwidthEstimator
 
@@ -27,26 +123,8 @@ function bandwidth(::SilvermanBandwidth, v::AbstractVector{T},
                    lo::T, hi::T, ::Boundary.T;
                    weights::Union{Nothing, <:AbstractVector} = nothing
                    ) where {T}
-    # Get the effective sample size and variance simultaneously
-    #   - See note in init() about neffective calculation
-    #   - Calculate variance via Welford's algorithm:
-    #     https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford%27s_online_algorithm
-    wsum = zero(_unitless(T))
-    wsqr = zero(wsum)
-    μ = μ₋₁ = zero(T)
-    σ² = zero(T)^2  # unitful numbers require correct squaring
-    I = isnothing(weights) ? eachindex(v) : eachindex(v, weights)
-    for ii in I
-        x = @inbounds v[ii]
-        w = isnothing(weights) ? one(wsum) : @inbounds weights[ii]
-        lo ≤ x ≤ hi || continue  # skip out-of-bounds elements
-        wsum += w
-        wsqr += w^2
-        ω = w / wsum
-        μ₋₁, μ = μ, (one(T) - ω) * μ + ω * x
-        σ² = (one(T) - ω) * σ² + ω * (x - μ) * (x - μ₋₁)
-    end
-    neff = wsum^2 / wsqr
+    neff, Σ = _neff_covar((v,), (lo,), (hi,), weights)
+    σ = sqrt(Σ) * oneunit(eltype(v))
 
     # From Hansen (2009) — https://users.ssc.wisc.edu/~bhansen/718/NonParametrics1.pdf
     # for a Gaussian kernel:
@@ -56,10 +134,11 @@ function bandwidth(::SilvermanBandwidth, v::AbstractVector{T},
     # - Section 2.9, letting ν = 2:
     #   - bw = σ̂ n^(-1/5) C₂(k)
     #     C₂(k) = 2 ( 8R(k)√π / 96κ₂² )^(1/5) == (4/3)^(1/5)
-    return iszero(σ²) ? eps(oneunit(T)) :
-        sqrt(σ²) * (oftype(one(T), (4one(T) / 3)) / neff)^(one(T) / 5)
+    return iszero(σ) ? eps(oneunit(T)) :
+        σ * (oftype(one(T), (4one(T) / 3)) / neff)^(one(T) / 5)
 end
 
+
 module ISJ
     # An implementation of Brent's method, translated from the algorithm described in
     #   https://en.wikipedia.org/wiki/Brent%27s_method
diff --git a/test/Project.toml b/test/Project.toml
@@ -3,6 +3,7 @@ CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 KernelDensityEstimation = "e46ec5f4-66dc-4371-a668-81bd92d19d7d"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
diff --git a/test/kde.jl b/test/kde.jl
@@ -1,6 +1,7 @@
 using .KDE: estimate
 
 using FFTW
+using LinearAlgebra: I
 using Statistics: std, var
 using Random: Random, randn
 using Unitful
@@ -409,6 +410,68 @@ end
     @testset "Silverman Bandwidth" begin
         σ = rv_norm_σ
 
+        @testset "Variance Estimation" begin
+            # Check the implementation of the standard deviation and effective sample size
+            # calculation
+            v = view(rv_norm_long, 1:1_000_000)
+            v1 = (v,)
+            lo = (-10σ,)
+            hi = (+10σ,)
+            neff1, var1 = @inferred KDE._neff_covar(v1, lo, hi, nothing)
+            @test neff1 == length(v)
+            @test var1 ≈ σ^2 atol = 16.0 / sqrt(neff1)
+            # weights = nothing is same as uniform (all ones)
+            neff2, var2 = @inferred KDE._neff_covar(v1, lo, hi, fill(1.0, length(v)))
+            @test neff2 == neff1
+            @test var2 == var1
+
+            # 1D is special-cased
+            #   It should not allocate
+            if VERSION >= v"1.12.0-beta3"
+                @test (@allocated KDE._neff_covar(v1, lo, hi, nothing)) == 0
+            else
+                @test_broken (@allocated KDE._neff_covar(v1, lo, hi, nothing)) == 0
+            end
+            # 1D is special cased — verify that the general case matches
+            gensig = Tuple{#=coords=# Tuple{Vararg{AbstractVector,N}},
+                           #=lo=# Tuple{Vararg{Any,N}},
+                           #=hi=# Tuple{Vararg{Any,N}},
+                           #=weights=# Nothing
+                           } where N
+            neff3, var3 = invoke(KDE._neff_covar, gensig, v1, lo, hi, nothing)
+            @test neff3 == neff1
+            @test var3 isa AbstractMatrix{Float64}
+            @test only(var3) == var1
+
+
+            # multidimensional covariances
+
+            # using the same data multiple times is perfect correlation, so all entries
+            # in the covariance matrix are identical (and equal to the variance of the data)
+            #   2x2
+            neff, covar = KDE._neff_covar((v, v), -10σ.*(1,1), 10σ.*(1,1), nothing)
+            @test neff == length(v)
+            @test covar == var1 .* ones(2, 2)
+            #   3x3
+            neff, covar = KDE._neff_covar((v, v, v), -10σ.*(1,1,1), 10σ.*(1,1,1), nothing)
+            @test neff == length(v)
+            @test covar == var1 .* ones(3, 3)
+            # circularly-shifting one of the inputs decorrelates the inputs, so then the
+            # covariance matrix should be approximately diagonal
+            w = circshift(v, -1)
+            #   2x2
+            neff, covar = KDE._neff_covar((v, w), -10σ.*(1,1), 10σ.*(1,1), nothing)
+            @test neff == length(v)
+            @test covar ≈ var1 * I  atol=16/sqrt(length(v))
+            #   3x3 (not diagonal)
+            neff, covar = KDE._neff_covar((v, v, w), -10σ.*(1,1,1), 10σ.*(1,1,1), nothing)
+            @test neff == length(v)
+            Σ = [1 1 0
+                 1 1 0
+                 0 0 1] .* var1
+            @test covar ≈ Σ  atol=16/sqrt(length(v))
+        end
+
         # Test that the estimator approximately matches the asymptotic behavior for a
         # the known Gaussian distribution behavior.
         #   N.B. use very large numbers to reduce sample variance