Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ version = "0.3.22"
[deps]
CPUSummary = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
HyperDualNumbers = "50ceba7f-c3ee-5a84-a6e8-3ad40456ec97"
IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
ManualMemory = "d125e4d3-2237-4719-b19c-fa641b8a4667"
Expand All @@ -16,9 +17,16 @@ StaticArrayInterface = "0d7ed370-da01-4f52-bd93-41d350b8b718"
ThreadingUtilities = "8290d209-cae3-49c0-8002-c8c24d57dab5"
VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"

[weakdeps]
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"

[extensions]
ForwardDiffExt = "ForwardDiff"

[compat]
CPUSummary = "0.1.26, 0.2.1"
ForwardDiff = "0.10"
HyperDualNumbers = "4"
IfElse = "0.1"
LoopVectorization = "0.12.86"
ManualMemory = "0.1.1"
Expand All @@ -30,13 +38,11 @@ ThreadingUtilities = "0.5"
VectorizationBase = "0.21.15"
julia = "1.6"

[extensions]
ForwardDiffExt = "ForwardDiff"

[extras]
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
HyperDualNumbers = "50ceba7f-c3ee-5a84-a6e8-3ad40456ec97"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
Expand All @@ -45,7 +51,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"

[targets]
test = ["Aqua", "BenchmarkTools", "ForwardDiff", "InteractiveUtils", "LinearAlgebra", "LoopVectorization", "Random", "VectorizationBase", "Test"]

[weakdeps]
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
test = ["Aqua", "BenchmarkTools", "ForwardDiff", "HyperDualNumbers", "InteractiveUtils", "LinearAlgebra", "LoopVectorization", "Random", "VectorizationBase", "Test"]
233 changes: 233 additions & 0 deletions ext/HyperDualNumbersExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
module HyperDualNumbersExt

using HyperDualNumbers: Hyper
using Octavian: ArrayInterface,
@turbo, @tturbo,
One, Zero,
indices, static
import Octavian: real_rep, _matmul!, _matmul_serial!

real_rep(a::AbstractArray{DualT}) where {T,DualT<:Hyper{T}} =
reinterpret(reshape, T, a)
_view1(B::AbstractMatrix) = @view(B[1, :])
_view1(B::AbstractArray{<:Any,3}) = @view(B[1, :, :])

for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix)
# multiplication of dual vector/matrix by standard matrix from the left
@eval function _matmul!(
_C::$(AbstractVectorOrMatrix){DualT},
A::AbstractMatrix,
_B::$(AbstractVectorOrMatrix){DualT},
α,
β = Zero(),
nthread::Nothing = nothing,
MKN = nothing,
contig_axis = nothing
) where {T, DualT<:Hyper{T}}
B = real_rep(_B)
C = real_rep(_C)

@tturbo for n ∈ indices((C, B), 3),
m ∈ indices((C, A), (2, 1)),
l in indices((C, B), 1)

Cₗₘₙ = zero(eltype(C))
for k ∈ indices((A, B), 2)
Cₗₘₙ += A[m, k] * B[l, k, n]
end
C[l, m, n] = α * Cₗₘₙ + β * C[l, m, n]
end

_C
end

# multiplication of dual matrix by standard vector/matrix from the right
@eval @inline function _matmul!(
_C::$(AbstractVectorOrMatrix){DualT},
_A::AbstractMatrix{DualT},
B::$(AbstractVectorOrMatrix),
α = One(),
β = Zero(),
nthread::Nothing = nothing,
MKN = nothing
) where {T,DualT<:Hyper{T}}
if Bool(ArrayInterface.is_dense(_C)) &&
Bool(ArrayInterface.is_column_major(_C)) &&
Bool(ArrayInterface.is_dense(_A)) &&
Bool(ArrayInterface.is_column_major(_A))
# we can avoid the reshape and call the standard method
A = reinterpret(T, _A)
C = reinterpret(T, _C)
_matmul!(C, A, B, α, β, nthread, nothing)
else
# we cannot use the standard method directly
A = real_rep(_A)
C = real_rep(_C)

@tturbo for n ∈ indices((C, B), (3, 2)),
m ∈ indices((C, A), 2),
l in indices((C, A), 1)

Cₗₘₙ = zero(eltype(C))
for k ∈ indices((A, B), (3, 1))
Cₗₘₙ += A[l, m, k] * B[k, n]
end
C[l, m, n] = α * Cₗₘₙ + β * C[l, m, n]
end
end

_C
end

@eval @inline function _matmul!(
_C::$(AbstractVectorOrMatrix){DualT},
_A::AbstractMatrix{DualT},
_B::$(AbstractVectorOrMatrix){DualT},
α = One(),
β = Zero(),
nthread::Nothing = nothing,
MKN = nothing,
contig = nothing
) where {T,DualT<:Hyper{T}}
A = real_rep(_A)
C = real_rep(_C)
B = real_rep(_B)
if Bool(ArrayInterface.is_dense(_C)) &&
Bool(ArrayInterface.is_column_major(_C)) &&
Bool(ArrayInterface.is_dense(_A)) &&
Bool(ArrayInterface.is_column_major(_A))
# we can avoid the reshape and call the standard method
Ar = reinterpret(T, _A)
Cr = reinterpret(T, _C)
_matmul!(Cr, Ar, _view1(B), α, β, nthread, nothing)
else
# we cannot use the standard method directly
@tturbo for n ∈ indices((C, B), 3),
m ∈ indices((C, A), 2),
l in indices((C, A), 1)

Cₗₘₙ = zero(eltype(C))
for k ∈ indices((A, B), (3, 2))
Cₗₘₙ += A[l, m, k] * B[1, k, n]
end
C[l, m, n] = α * Cₗₘₙ + β * C[l, m, n]
end
end
@tturbo for n ∈ indices((B, C), 3), m ∈ indices((A, C), 2), p ∈ 1:3
Cₚₘₙ = zero(eltype(C))
for k ∈ indices((A, B), (3, 2))
Cₚₘₙ += A[1, m, k] * B[p+1, k, n]
end
C[p+1, m, n] = C[p+1, m, n] + α * Cₚₘₙ
end
_C
end

# multiplication of dual vector/matrix by standard matrix from the left
@eval function _matmul_serial!(
_C::$(AbstractVectorOrMatrix){DualT},
A::AbstractMatrix,
_B::$(AbstractVectorOrMatrix){DualT},
α,
β,
MKN
) where {T, DualT<:Hyper{T}}
B = real_rep(_B)
C = real_rep(_C)

@turbo for n ∈ indices((C, B), 3),
m ∈ indices((C, A), (2, 1)),
l in indices((C, B), 1)

Cₗₘₙ = zero(eltype(C))
for k ∈ indices((A, B), 2)
Cₗₘₙ += A[m, k] * B[l, k, n]
end
C[l, m, n] = α * Cₗₘₙ + β * C[l, m, n]
end

_C
end

# multiplication of dual matrix by standard vector/matrix from the right
@eval @inline function _matmul_serial!(
_C::$(AbstractVectorOrMatrix){DualT},
_A::AbstractMatrix{DualT},
B::$(AbstractVectorOrMatrix),
α,
β,
MKN
) where {T,DualT<:Hyper{T}}
if Bool(ArrayInterface.is_dense(_C)) &&
Bool(ArrayInterface.is_column_major(_C)) &&
Bool(ArrayInterface.is_dense(_A)) &&
Bool(ArrayInterface.is_column_major(_A))
# we can avoid the reshape and call the standard method
A = reinterpret(T, _A)
C = reinterpret(T, _C)
_matmul_serial!(C, A, B, α, β, nothing)
else
# we cannot use the standard method directly
A = real_rep(_A)
C = real_rep(_C)

@turbo for n ∈ indices((C, B), (3, 2)),
m ∈ indices((C, A), 2),
l in indices((C, A), 1)

Cₗₘₙ = zero(eltype(C))
for k ∈ indices((A, B), (3, 1))
Cₗₘₙ += A[l, m, k] * B[k, n]
end
C[l, m, n] = α * Cₗₘₙ + β * C[l, m, n]
end
end

_C
end

@eval @inline function _matmul_serial!(
_C::$(AbstractVectorOrMatrix){DualT},
_A::AbstractMatrix{DualT},
_B::$(AbstractVectorOrMatrix){DualT},
α,
β,
MKN
) where {T, DualT<:Hyper{T}}
A = real_rep(_A)
C = real_rep(_C)
B = real_rep(_B)
if Bool(ArrayInterface.is_dense(_C)) &&
Bool(ArrayInterface.is_column_major(_C)) &&
Bool(ArrayInterface.is_dense(_A)) &&
Bool(ArrayInterface.is_column_major(_A))
# we can avoid the reshape and call the standard method
Ar = reinterpret(T, _A)
Cr = reinterpret(T, _C)
_matmul_serial!(Cr, Ar, _view1(B), α, β, nothing)
else
# we cannot use the standard method directly
@turbo for n ∈ indices((C, B), 3),
m ∈ indices((C, A), 2),
l in indices((C, A), 1)

Cₗₘₙ = zero(eltype(C))
for k ∈ indices((A, B), (3, 2))
Cₗₘₙ += A[l, m, k] * B[1, k, n]
end
C[l, m, n] = α * Cₗₘₙ + β * C[l, m, n]
end
end

@turbo for n ∈ indices((B, C), 3), m ∈ indices((A, C), 2), p ∈ 1:3
Cₚₘₙ = zero(eltype(C))
for k ∈ indices((A, B), (3, 2))
Cₚₘₙ += A[1, m, k] * B[p+1, k, n]
end
C[p+1, m, n] = C[p+1, m, n] + α * Cₚₘₙ
end
_C
end
end # for

end # module
3 changes: 3 additions & 0 deletions src/Octavian.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ if !isdefined(Base, :get_extension)
include("../ext/ForwardDiffExt.jl")
end

# TODO: confirm when we need this extension
include("../ext/HyperDualNumbersExt.jl")

@static if VERSION >= v"1.8.0-beta1"
@setup_workload begin
# Putting some things in `setup` can reduce the size of the
Expand Down
68 changes: 68 additions & 0 deletions test/hyperduals.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
function randdual(x)
_x = zeros(HyperDualNumbers.Hyper{Float64}, size(x)...)
for i in eachindex(x)
_x = HyperDualNumbers.Hyper(x[i], rand(), rand(), rand())
end
return _x
end

function reinterpretHD(T, A)
tmp = reinterpret(T, A)
return tmp[1:4:end, :]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why the slice?

Copy link
Contributor Author

@CheukHinHoJerry CheukHinHoJerry Jun 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reinterpreting a $N \times M$ matrix containing HyperDuals returns a $4N \times M$ matrix containing val, $\epsilon_{1}$, $\epsilon_{2}$ and $\epsilon_{12}$ part of each of the entries.

This is essentially checking only the val part of the matrix $A$.

I will do some updates and name the tests properly on what they are checking.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we also want to check the epsilons?

Copy link
Contributor Author

@CheukHinHoJerry CheukHinHoJerry Jun 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought what you mean in #179 (comment) is that we only check val part. Sorry for misunderstanding that and I will fix it now.

Copy link
Collaborator

@chriselrod chriselrod Jun 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant that is what isapprox only checks the real part, so we need to reinterpret to check the entire thing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I now check the entire thing and it should be ready for review. Sorry for iterating so many times.

end

@time @testset "HyperDualNumbers.jl" begin
m = 53
n = 63
k = 73

A1 = rand(Float64, m, k)
B1 = rand(Float64, k, n)
C1 = rand(Float64, m, n)

A2 = deepcopy(A1)
B2 = deepcopy(B1)
C2 = deepcopy(C1)

α = Float64(2.0)
β = Float64(2.0)

Octavian.matmul!(C1, A1, B1, α, β)
LinearAlgebra.mul!(C2, A2, B2, α, β)
@test C1 ≈ C2
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
@test C1 C2
@test reinterpretH(C1) reinterpretHD(C2)


A1dual = zeros(HyperDualNumbers.Hyper{Float64}, reverse(size(A1))...)
A1dual .= A1'
C1dual = zeros(HyperDualNumbers.Hyper{Float64}, size(C1)...)

A2dual = deepcopy(A1dual)
C2dual = deepcopy(C1dual)
C3dual = similar(C1dual)
C4dual = similar(C2dual)
Octavian.matmul!(C1dual, A1dual', B1)
Octavian.matmul!(C2dual, A2dual', B2)
Octavian.matmul_serial!(C3dual, A1dual', B1)
Octavian.matmul_serial!(C4dual, A2dual', B2)

Cref = zeros(Float64, size(C1)...)
LinearAlgebra.mul!(Cref, A1, B1)
@test reinterpretHD(Float64, C1dual) ≈ reinterpretHD(Float64, C2dual) ≈ reinterpretHD(Float64, C3dual) ≈ reinterpretHD(Float64, C4dual) ≈ Cref


@testset "two dual arrays" begin
A1d = randdual.(A1)
B1d = randdual.(B1)
@test reinterpretHD(Float64, Octavian.matmul(A1d, B1d, 1.3)) ≈
reinterpretHD(Float64, Octavian.matmul_serial(A1d, B1d, 1.3)) ≈
reinterpretHD(Float64, (A1d * B1d) .* 1.3)
@test reinterpretHD(
Float64,
Octavian.matmul(@view(A1d[begin:end-1, :]), B1d)
) ≈
reinterpretHD(
Float64,
Octavian.matmul_serial(@view(A1d[begin:end-1, :]), B1d)
) ≈
reinterpretHD(Float64, @view(A1d[begin:end-1, :]) * B1d)
end
end
3 changes: 2 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import Octavian
import Aqua
import BenchmarkTools
import ForwardDiff
import HyperDualNumbers
import InteractiveUtils
import LinearAlgebra
import LoopVectorization
Expand Down Expand Up @@ -37,5 +38,5 @@ include("utils.jl")
if sizeof(Int) >= 8 || !Sys.iswindows()
include("forward_diff.jl")
end

include("hyperduals.jl")
include("aqua.jl") # run the Aqua.jl tests last