1+ import LinearAlgebra: Symmetric, cholesky
2+
3+ # Get the effective sample size and (co)variance simultaneously
4+ #
5+ # - Calculate variance via Welford's algorithm:
6+ #
7+ # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford%27s_online_algorithm
8+ #
9+ # - Calculate the effective sample size, based on weights and the bounds, using the
10+ # Kish effective sample size definition:
11+ #
12+ # n_eff = sum(weights)^2 / sum(weights .^ 2)
13+ #
14+ # https://search.r-project.org/CRAN/refmans/svyweight/html/eff_n.html
15+ # https://en.wikipedia.org/wiki/Weighted_arithmetic_mean#Reliability_weights
16+ function _neff_covar (coords:: Tuple{Vararg{AbstractVector,N}} ,
17+ lo:: Tuple{Vararg{Any,N}} ,
18+ hi:: Tuple{Vararg{Any,N}} ,
19+ weights:: Union{Nothing,<:AbstractVector} ) where {N}
20+ T = promote_type (map (_unitless∘ eltype, coords)... )
21+ wsum = wsqr = isnothing (weights) ? zero (T) : zero (eltype (weights))
22+
23+ x = zeros (T, N)
24+ μ = zeros (T, N)
25+ μ₋₁ = zeros (T, N)
26+ Σ = zeros (T, N, N)
27+ I = isnothing (weights) ? eachindex (coords... ) : eachindex (weights, coords... )
28+ for ii in I
29+ # @. x = coords[ii] * _invunit(typeof(coords[ii]))
30+ # !all(@. lo ≤ x ≤ hi) && continue
31+ indomain = true
32+ for jj in 1 : N
33+ v = coords[jj][ii]
34+ x[jj] = v * oneunit (_invunit (typeof (v)))
35+ indomain &= lo[jj] ≤ v ≤ hi[jj]
36+ end
37+ ! indomain && continue
38+
39+ w = isnothing (weights) ? one (wsum) : weights[ii]
40+ wsum += w
41+ wsqr += w^ 2
42+ ω = w / wsum
43+ ω̄ = one (ω) - ω
44+
45+ # @. μ₋₁ = μ
46+ # @. μ = ω̄ * μ₋₁ + ω * x
47+ for jj in 1 : N
48+ μ₋₁[jj] = μ[jj]
49+ μ[jj] = ω̄ * μ[jj] + ω * x[jj]
50+ end
51+ # @. Σ = ω̄ * Σ + ω * (x - μ) * (x - μ₋₁)'
52+ # with only the upper-triangle filled
53+ for jj in 1 : N
54+ yy = x[jj] - μ₋₁[jj]
55+ for kk in 1 : jj
56+ Σ[kk,jj] = ω̄ * Σ[kk, jj] + ω * (x[kk] - μ[kk]) * yy
57+ end
58+ end
59+ end
60+ neff = wsum^ 2 / wsqr
61+ return neff, Symmetric (Σ)
62+ end
63+
64+ # specialize for 1D case where the variance is scalar, so allocating arrays can be avoided
65+ function _neff_covar (coords:: Tuple{AbstractVector} ,
66+ lo:: Tuple{Any} ,
67+ hi:: Tuple{Any} ,
68+ weights:: Union{Nothing,<:AbstractVector} )
69+ T = promote_type (map (_unitless∘ eltype, coords)... )
70+ wsum = wsqr = isnothing (weights) ? zero (T) : zero (eltype (weights))
71+
72+ x = zero (T)
73+ μ = zero (T)
74+ μ₋₁ = zero (T)
75+ Σ = zero (T)
76+ I = isnothing (weights) ? eachindex (coords... ) : eachindex (weights, coords... )
77+ for ii in I
78+ v = coords[1 ][ii]
79+ lo[1 ] ≤ v ≤ hi[1 ] || continue
80+ x = v * oneunit (_invunit (typeof (v)))
81+
82+ w = isnothing (weights) ? one (wsum) : weights[ii]
83+ wsum += w
84+ wsqr += w^ 2
85+ ω = w / wsum
86+ ω̄ = one (ω) - ω
87+
88+ μ₋₁ = μ
89+ μ = ω̄ * μ + ω * x
90+ Σ = ω̄ * Σ + ω * (x - μ) * (x - μ₋₁)
91+ end
92+ neff = wsum^ 2 / wsqr
93+ return neff, Σ
94+ end
95+
96+
197"""
298 SilvermanBandwidth <: AbstractBandwidthEstimator
399
@@ -27,26 +123,8 @@ function bandwidth(::SilvermanBandwidth, v::AbstractVector{T},
27123 lo:: T , hi:: T , :: Boundary.T ;
28124 weights:: Union{Nothing, <:AbstractVector} = nothing
29125 ) where {T}
30- # Get the effective sample size and variance simultaneously
31- # - See note in init() about neffective calculation
32- # - Calculate variance via Welford's algorithm:
33- # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford%27s_online_algorithm
34- wsum = zero (_unitless (T))
35- wsqr = zero (wsum)
36- μ = μ₋₁ = zero (T)
37- σ² = zero (T)^ 2 # unitful numbers require correct squaring
38- I = isnothing (weights) ? eachindex (v) : eachindex (v, weights)
39- for ii in I
40- x = @inbounds v[ii]
41- w = isnothing (weights) ? one (wsum) : @inbounds weights[ii]
42- lo ≤ x ≤ hi || continue # skip out-of-bounds elements
43- wsum += w
44- wsqr += w^ 2
45- ω = w / wsum
46- μ₋₁, μ = μ, (one (T) - ω) * μ + ω * x
47- σ² = (one (T) - ω) * σ² + ω * (x - μ) * (x - μ₋₁)
48- end
49- neff = wsum^ 2 / wsqr
126+ neff, Σ = _neff_covar ((v,), (lo,), (hi,), weights)
127+ σ = sqrt (Σ) * oneunit (eltype (v))
50128
51129 # From Hansen (2009) — https://users.ssc.wisc.edu/~bhansen/718/NonParametrics1.pdf
52130 # for a Gaussian kernel:
@@ -56,10 +134,11 @@ function bandwidth(::SilvermanBandwidth, v::AbstractVector{T},
56134 # - Section 2.9, letting ν = 2:
57135 # - bw = σ̂ n^(-1/5) C₂(k)
58136 # C₂(k) = 2 ( 8R(k)√π / 96κ₂² )^(1/5) == (4/3)^(1/5)
59- return iszero (σ² ) ? eps (oneunit (T)) :
60- sqrt (σ²) * (oftype (one (T), (4 one (T) / 3 )) / neff)^ (one (T) / 5 )
137+ return iszero (σ) ? eps (oneunit (T)) :
138+ σ * (oftype (one (T), (4 one (T) / 3 )) / neff)^ (one (T) / 5 )
61139end
62140
141+
63142module ISJ
64143 # An implementation of Brent's method, translated from the algorithm described in
65144 # https://en.wikipedia.org/wiki/Brent%27s_method
0 commit comments