From 9e9b9ba03f6cadb959a6d35b82333887ce1d466d Mon Sep 17 00:00:00 2001 From: cheukhinhojerry Date: Thu, 15 Jun 2023 22:21:13 -0700 Subject: [PATCH 1/7] Add HyperDualNumbersExt --- ext/HyperDualNumbersExt.jl | 235 +++++++++++++++++++++++++++++++++++++ 1 file changed, 235 insertions(+) create mode 100644 ext/HyperDualNumbersExt.jl diff --git a/ext/HyperDualNumbersExt.jl b/ext/HyperDualNumbersExt.jl new file mode 100644 index 0000000..000f54c --- /dev/null +++ b/ext/HyperDualNumbersExt.jl @@ -0,0 +1,235 @@ +module HyperDualNumbersExt + +using HyperDualNumbers: Hyper + +using Octavian: ArrayInterface, + @turbo, @tturbo, + One, Zero, + indices, static +import Octavian: real_rep, _matmul!, _matmul_serial! + +real_rep(a::AbstractArray{DualT}) where {T,DualT<:Hyper{T}} = + reinterpret(reshape, T, a) +_view1(B::AbstractMatrix) = @view(B[1, :]) +_view1(B::AbstractArray{<:Any,3}) = @view(B[1, :, :]) + +for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix) + # multiplication of dual vector/matrix by standard matrix from the left + @eval function _matmul!( + _C::$(AbstractVectorOrMatrix){DualT}, + A::AbstractMatrix, + _B::$(AbstractVectorOrMatrix){DualT}, + α, + β = Zero(), + nthread::Nothing = nothing, + MKN = nothing, + contig_axis = nothing + ) where {DualT<:Hyper} + B = real_rep(_B) + C = real_rep(_C) + + @tturbo for n ∈ indices((C, B), 3), + m ∈ indices((C, A), (2, 1)), + l in indices((C, B), 1) + + Cₗₘₙ = zero(eltype(C)) + for k ∈ indices((A, B), 2) + Cₗₘₙ += A[m, k] * B[l, k, n] + end + C[l, m, n] = α * Cₗₘₙ + β * C[l, m, n] + end + + _C + end + + # multiplication of dual matrix by standard vector/matrix from the right + @eval @inline function _matmul!( + _C::$(AbstractVectorOrMatrix){DualT}, + _A::AbstractMatrix{DualT}, + B::$(AbstractVectorOrMatrix), + α = One(), + β = Zero(), + nthread::Nothing = nothing, + MKN = nothing + ) where {T,DualT<:Hyper{T}} + if Bool(ArrayInterface.is_dense(_C)) && + Bool(ArrayInterface.is_column_major(_C)) && + Bool(ArrayInterface.is_dense(_A)) && + Bool(ArrayInterface.is_column_major(_A)) + # we can avoid the reshape and call the standard method + A = reinterpret(T, _A) + C = reinterpret(T, _C) + _matmul!(C, A, B, α, β, nthread, nothing) + else + # we cannot use the standard method directly + A = real_rep(_A) + C = real_rep(_C) + + @tturbo for n ∈ indices((C, B), (3, 2)), + m ∈ indices((C, A), 2), + l in indices((C, A), 1) + + Cₗₘₙ = zero(eltype(C)) + for k ∈ indices((A, B), (3, 1)) + Cₗₘₙ += A[l, m, k] * B[k, n] + end + C[l, m, n] = α * Cₗₘₙ + β * C[l, m, n] + end + end + + _C + end + + @eval @inline function _matmul!( + _C::$(AbstractVectorOrMatrix){DualT}, + _A::AbstractMatrix{DualT}, + _B::$(AbstractVectorOrMatrix){DualT}, + α = One(), + β = Zero(), + nthread::Nothing = nothing, + MKN = nothing, + contig = nothing + ) where {T,DualT<:Hyper{T}} + A = real_rep(_A) + C = real_rep(_C) + B = real_rep(_B) + if Bool(ArrayInterface.is_dense(_C)) && + Bool(ArrayInterface.is_column_major(_C)) && + Bool(ArrayInterface.is_dense(_A)) && + Bool(ArrayInterface.is_column_major(_A)) + # we can avoid the reshape and call the standard method + Ar = reinterpret(T, _A) + Cr = reinterpret(T, _C) + _matmul!(Cr, Ar, _view1(B), α, β, nthread, nothing) + else + # we cannot use the standard method directly + @tturbo for n ∈ indices((C, B), 3), + m ∈ indices((C, A), 2), + l in indices((C, A), 1) + + Cₗₘₙ = zero(eltype(C)) + for k ∈ indices((A, B), (3, 2)) + Cₗₘₙ += A[l, m, k] * B[1, k, n] + end + C[l, m, n] = α * Cₗₘₙ + β * C[l, m, n] + end + end + Pstatic = static(P) + @tturbo for n ∈ indices((B, C), 3), m ∈ indices((A, C), 2), p ∈ 1:Pstatic + Cₚₘₙ = zero(eltype(C)) + for k ∈ indices((A, B), (3, 2)) + Cₚₘₙ += A[1, m, k] * B[p+1, k, n] + end + C[p+1, m, n] = C[p+1, m, n] + α * Cₚₘₙ + end + _C + end + + # multiplication of dual vector/matrix by standard matrix from the left + @eval function _matmul_serial!( + _C::$(AbstractVectorOrMatrix){DualT}, + A::AbstractMatrix, + _B::$(AbstractVectorOrMatrix){DualT}, + α, + β, + MKN + ) where {DualT<:Hyper} + B = real_rep(_B) + C = real_rep(_C) + + @turbo for n ∈ indices((C, B), 3), + m ∈ indices((C, A), (2, 1)), + l in indices((C, B), 1) + + Cₗₘₙ = zero(eltype(C)) + for k ∈ indices((A, B), 2) + Cₗₘₙ += A[m, k] * B[l, k, n] + end + C[l, m, n] = α * Cₗₘₙ + β * C[l, m, n] + end + + _C + end + +# multiplication of dual matrix by standard vector/matrix from the right + @eval @inline function _matmul_serial!( + _C::$(AbstractVectorOrMatrix){DualT}, + _A::AbstractMatrix{DualT}, + B::$(AbstractVectorOrMatrix), + α, + β, + MKN + ) where {T,DualT<:Hyper{T}} + if Bool(ArrayInterface.is_dense(_C)) && + Bool(ArrayInterface.is_column_major(_C)) && + Bool(ArrayInterface.is_dense(_A)) && + Bool(ArrayInterface.is_column_major(_A)) + # we can avoid the reshape and call the standard method + A = reinterpret(T, _A) + C = reinterpret(T, _C) + _matmul_serial!(C, A, B, α, β, nothing) + else + # we cannot use the standard method directly + A = real_rep(_A) + C = real_rep(_C) + + @turbo for n ∈ indices((C, B), (3, 2)), + m ∈ indices((C, A), 2), + l in indices((C, A), 1) + + Cₗₘₙ = zero(eltype(C)) + for k ∈ indices((A, B), (3, 1)) + Cₗₘₙ += A[l, m, k] * B[k, n] + end + C[l, m, n] = α * Cₗₘₙ + β * C[l, m, n] + end + end + + _C + end + + @eval @inline function _matmul_serial!( + _C::$(AbstractVectorOrMatrix){DualT}, + _A::AbstractMatrix{DualT}, + _B::$(AbstractVectorOrMatrix){DualT}, + α, + β, + MKN + ) where {DualT<:Hyper} + A = real_rep(_A) + C = real_rep(_C) + B = real_rep(_B) + if Bool(ArrayInterface.is_dense(_C)) && + Bool(ArrayInterface.is_column_major(_C)) && + Bool(ArrayInterface.is_dense(_A)) && + Bool(ArrayInterface.is_column_major(_A)) + # we can avoid the reshape and call the standard method + Ar = reinterpret(T, _A) + Cr = reinterpret(T, _C) + _matmul_serial!(Cr, Ar, _view1(B), α, β, nothing) + else + # we cannot use the standard method directly + @turbo for n ∈ indices((C, B), 3), + m ∈ indices((C, A), 2), + l in indices((C, A), 1) + + Cₗₘₙ = zero(eltype(C)) + for k ∈ indices((A, B), (3, 2)) + Cₗₘₙ += A[l, m, k] * B[1, k, n] + end + C[l, m, n] = α * Cₗₘₙ + β * C[l, m, n] + end + end + Pstatic = static(P) + @turbo for n ∈ indices((B, C), 3), m ∈ indices((A, C), 2), p ∈ 1:Pstatic + Cₚₘₙ = zero(eltype(C)) + for k ∈ indices((A, B), (3, 2)) + Cₚₘₙ += A[1, m, k] * B[p+1, k, n] + end + C[p+1, m, n] = C[p+1, m, n] + α * Cₚₘₙ + end + _C + end +end # for + +end # module \ No newline at end of file From eec3bba5c5269aa2282b47237c3ea63f1bb7046a Mon Sep 17 00:00:00 2001 From: cheukhinhojerry Date: Fri, 16 Jun 2023 12:41:17 -0700 Subject: [PATCH 2/7] add test and dependency --- Project.toml | 17 ++++++---- ext/HyperDualNumbersExt.jl | 14 ++++---- src/Octavian.jl | 3 ++ test/hyperduals.jl | 66 ++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 3 +- 5 files changed, 87 insertions(+), 16 deletions(-) create mode 100644 test/hyperduals.jl diff --git a/Project.toml b/Project.toml index 586a097..f301863 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,7 @@ version = "0.3.22" [deps] CPUSummary = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +HyperDualNumbers = "50ceba7f-c3ee-5a84-a6e8-3ad40456ec97" IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" ManualMemory = "d125e4d3-2237-4719-b19c-fa641b8a4667" @@ -16,9 +17,16 @@ StaticArrayInterface = "0d7ed370-da01-4f52-bd93-41d350b8b718" ThreadingUtilities = "8290d209-cae3-49c0-8002-c8c24d57dab5" VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" +[weakdeps] +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" + +[extensions] +ForwardDiffExt = "ForwardDiff" + [compat] CPUSummary = "0.1.26, 0.2.1" ForwardDiff = "0.10" +HyperDualNumbers = "4" IfElse = "0.1" LoopVectorization = "0.12.86" ManualMemory = "0.1.1" @@ -30,13 +38,11 @@ ThreadingUtilities = "0.5" VectorizationBase = "0.21.15" julia = "1.6" -[extensions] -ForwardDiffExt = "ForwardDiff" - [extras] Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +HyperDualNumbers = "50ceba7f-c3ee-5a84-a6e8-3ad40456ec97" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" @@ -45,7 +51,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" [targets] -test = ["Aqua", "BenchmarkTools", "ForwardDiff", "InteractiveUtils", "LinearAlgebra", "LoopVectorization", "Random", "VectorizationBase", "Test"] - -[weakdeps] -ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +test = ["Aqua", "BenchmarkTools", "ForwardDiff", "HyperDualNumbers", "InteractiveUtils", "LinearAlgebra", "LoopVectorization", "Random", "VectorizationBase", "Test"] diff --git a/ext/HyperDualNumbersExt.jl b/ext/HyperDualNumbersExt.jl index 000f54c..b086d94 100644 --- a/ext/HyperDualNumbersExt.jl +++ b/ext/HyperDualNumbersExt.jl @@ -1,7 +1,6 @@ module HyperDualNumbersExt using HyperDualNumbers: Hyper - using Octavian: ArrayInterface, @turbo, @tturbo, One, Zero, @@ -24,7 +23,7 @@ for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix) nthread::Nothing = nothing, MKN = nothing, contig_axis = nothing - ) where {DualT<:Hyper} + ) where {T, DualT<:Hyper{T}} B = real_rep(_B) C = real_rep(_C) @@ -114,8 +113,7 @@ for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix) C[l, m, n] = α * Cₗₘₙ + β * C[l, m, n] end end - Pstatic = static(P) - @tturbo for n ∈ indices((B, C), 3), m ∈ indices((A, C), 2), p ∈ 1:Pstatic + @tturbo for n ∈ indices((B, C), 3), m ∈ indices((A, C), 2), p ∈ 1:3 Cₚₘₙ = zero(eltype(C)) for k ∈ indices((A, B), (3, 2)) Cₚₘₙ += A[1, m, k] * B[p+1, k, n] @@ -133,7 +131,7 @@ for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix) α, β, MKN - ) where {DualT<:Hyper} + ) where {T, DualT<:Hyper{T}} B = real_rep(_B) C = real_rep(_C) @@ -195,7 +193,7 @@ for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix) α, β, MKN - ) where {DualT<:Hyper} + ) where {T, DualT<:Hyper{T}} A = real_rep(_A) C = real_rep(_C) B = real_rep(_B) @@ -220,8 +218,8 @@ for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix) C[l, m, n] = α * Cₗₘₙ + β * C[l, m, n] end end - Pstatic = static(P) - @turbo for n ∈ indices((B, C), 3), m ∈ indices((A, C), 2), p ∈ 1:Pstatic + + @turbo for n ∈ indices((B, C), 3), m ∈ indices((A, C), 2), p ∈ 1:3 Cₚₘₙ = zero(eltype(C)) for k ∈ indices((A, B), (3, 2)) Cₚₘₙ += A[1, m, k] * B[p+1, k, n] diff --git a/src/Octavian.jl b/src/Octavian.jl index 105633e..6539178 100644 --- a/src/Octavian.jl +++ b/src/Octavian.jl @@ -73,6 +73,9 @@ if !isdefined(Base, :get_extension) include("../ext/ForwardDiffExt.jl") end +# TODO: confirm when we need this extension +include("../ext/HyperDualNumbersExt.jl") + @static if VERSION >= v"1.8.0-beta1" @setup_workload begin # Putting some things in `setup` can reduce the size of the diff --git a/test/hyperduals.jl b/test/hyperduals.jl new file mode 100644 index 0000000..3b127f1 --- /dev/null +++ b/test/hyperduals.jl @@ -0,0 +1,66 @@ +function randdual(x) + _x = zeros(HyperDualNumbers.Hyper{Float64}, size(x)...) + for i in eachindex(x) + _x = HyperDualNumbers.Hyper(x[i], rand(), rand(), rand()) + end + return _x +end + +function reinterpretHD(T, A) + tmp = reinterpret(T, A) + return tmp[1:4:end, :] +end + +@time @testset "HyperDualNumbers.jl" begin + m = 53 + n = 63 + k = 73 + + A1 = rand(Float64, m, k) + B1 = rand(Float64, k, n) + C1 = rand(Float64, m, n) + + A2 = deepcopy(A1) + B2 = deepcopy(B1) + C2 = deepcopy(C1) + + α = Float64(2.0) + β = Float64(2.0) + + Octavian.matmul!(C1, A1, B1, α, β) + LinearAlgebra.mul!(C2, A2, B2, α, β) + @test C1 ≈ C2 + + A1dual = zeros(HyperDualNumbers.Hyper{Float64}, reverse(size(A1))...) + A1dual .= A1' + C1dual = zeros(HyperDualNumbers.Hyper{Float64}, size(C1)...) + + A2dual = deepcopy(A1dual) + C2dual = deepcopy(C1dual) + C3dual = similar(C1dual) + C4dual = similar(C2dual) + Octavian.matmul!(C1dual, A1dual', B1) + Octavian.matmul!(C2dual, A2dual', B2) + Octavian.matmul_serial!(C3dual, A1dual', B1) + Octavian.matmul_serial!(C4dual, A2dual', B2) + + @test reinterpretHD(Float64, C1dual) ≈ reinterpretHD(Float64, C2dual) ≈ reinterpretHD(Float64, C3dual) ≈ reinterpretHD(Float64, C4dual) + + + @testset "two dual arrays" begin + A1d = randdual.(A1) + B1d = randdual.(B1) + @test reinterpretHD(Float64, Octavian.matmul(A1d, B1d, 1.3)) ≈ + reinterpretHD(Float64, Octavian.matmul_serial(A1d, B1d, 1.3)) ≈ + reinterpretHD(Float64, (A1d * B1d) .* 1.3) + @test reinterpretHD( + Float64, + Octavian.matmul(@view(A1d[begin:end-1, :]), B1d) + ) ≈ + reinterpretHD( + Float64, + Octavian.matmul_serial(@view(A1d[begin:end-1, :]), B1d) + ) ≈ + reinterpretHD(Float64, @view(A1d[begin:end-1, :]) * B1d) + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 05baff2..5bb3e1e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,6 +9,7 @@ import Octavian import Aqua import BenchmarkTools import ForwardDiff +import HyperDualNumbers import InteractiveUtils import LinearAlgebra import LoopVectorization @@ -37,5 +38,5 @@ include("utils.jl") if sizeof(Int) >= 8 || !Sys.iswindows() include("forward_diff.jl") end - +include("hyperduals.jl") include("aqua.jl") # run the Aqua.jl tests last From 2d4b743c2d80a56ef276c8b90fc46735bcea732b Mon Sep 17 00:00:00 2001 From: cheukhinhojerry Date: Fri, 16 Jun 2023 13:07:53 -0700 Subject: [PATCH 3/7] minor fix on test --- test/hyperduals.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/hyperduals.jl b/test/hyperduals.jl index 3b127f1..b32a32e 100644 --- a/test/hyperduals.jl +++ b/test/hyperduals.jl @@ -44,7 +44,9 @@ end Octavian.matmul_serial!(C3dual, A1dual', B1) Octavian.matmul_serial!(C4dual, A2dual', B2) - @test reinterpretHD(Float64, C1dual) ≈ reinterpretHD(Float64, C2dual) ≈ reinterpretHD(Float64, C3dual) ≈ reinterpretHD(Float64, C4dual) + Cref = zeros(Float64, size(C1)...) + LinearAlgebra.mul!(Cref, A1, B1) + @test reinterpretHD(Float64, C1dual) ≈ reinterpretHD(Float64, C2dual) ≈ reinterpretHD(Float64, C3dual) ≈ reinterpretHD(Float64, C4dual) ≈ Cref @testset "two dual arrays" begin From ae5c086658267e9ad31e8c3fd54d35d22fdc5b4d Mon Sep 17 00:00:00 2001 From: cheukhinhojerry Date: Fri, 16 Jun 2023 20:25:47 -0700 Subject: [PATCH 4/7] tests fix with proper testsets --- test/hyperduals.jl | 77 +++++++++++++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 22 deletions(-) diff --git a/test/hyperduals.jl b/test/hyperduals.jl index b32a32e..bdf0d23 100644 --- a/test/hyperduals.jl +++ b/test/hyperduals.jl @@ -20,36 +20,69 @@ end B1 = rand(Float64, k, n) C1 = rand(Float64, m, n) - A2 = deepcopy(A1) - B2 = deepcopy(B1) - C2 = deepcopy(C1) - α = Float64(2.0) β = Float64(2.0) - Octavian.matmul!(C1, A1, B1, α, β) - LinearAlgebra.mul!(C2, A2, B2, α, β) - @test C1 ≈ C2 + @testset "real array from the right" begin + A1dual = zeros(HyperDualNumbers.Hyper{Float64}, size(A1)...) + C1dual = zeros(HyperDualNumbers.Hyper{Float64}, size(C1)...) + + A1dual .= A1 + C1dual .= C1 + + A2dual = deepcopy(A1dual) + B2 = deepcopy(B1) + C2dual = deepcopy(C1dual) + + Octavian.matmul!(C1dual, A1dual, B1, α, β) + LinearAlgebra.mul!(C2dual, A2dual, B2, α, β) + @test reinterpretHD(Float64, C1dual) ≈ reinterpretHD(Float64, C2dual) + end + + @testset "real array from the left" begin + B1dual = zeros(HyperDualNumbers.Hyper{Float64}, size(B1)...) + C1dual = zeros(HyperDualNumbers.Hyper{Float64}, size(C1)...) + + B1dual .= B1 + C1dual .= C1 - A1dual = zeros(HyperDualNumbers.Hyper{Float64}, reverse(size(A1))...) - A1dual .= A1' - C1dual = zeros(HyperDualNumbers.Hyper{Float64}, size(C1)...) + A2 = deepcopy(A1) + B2dual = deepcopy(B1dual) + C2dual = deepcopy(C1dual) - A2dual = deepcopy(A1dual) - C2dual = deepcopy(C1dual) - C3dual = similar(C1dual) - C4dual = similar(C2dual) - Octavian.matmul!(C1dual, A1dual', B1) - Octavian.matmul!(C2dual, A2dual', B2) - Octavian.matmul_serial!(C3dual, A1dual', B1) - Octavian.matmul_serial!(C4dual, A2dual', B2) + α = Float64(2.0) + β = Float64(2.0) - Cref = zeros(Float64, size(C1)...) - LinearAlgebra.mul!(Cref, A1, B1) - @test reinterpretHD(Float64, C1dual) ≈ reinterpretHD(Float64, C2dual) ≈ reinterpretHD(Float64, C3dual) ≈ reinterpretHD(Float64, C4dual) ≈ Cref + Octavian.matmul!(C1dual, A1, B1dual, α, β) + LinearAlgebra.mul!(C2dual, A2, B2dual, α, β) + @test reinterpretHD(Float64, C1dual) ≈ reinterpretHD(Float64, C2dual) + end + + + @testset "transposed arrays" begin + + A1dual = zeros(HyperDualNumbers.Hyper{Float64}, reverse(size(A1))...) + A1dual .= A1' + C1dual = zeros(HyperDualNumbers.Hyper{Float64}, size(C1)...) + + A2dual = deepcopy(A1dual) + B2 = deepcopy(B1) + C2dual = deepcopy(C1dual) + + C3dual = similar(C1dual) + C4dual = similar(C2dual) + Octavian.matmul!(C1dual, A1dual', B1) + Octavian.matmul!(C2dual, A2dual', B2) + Octavian.matmul_serial!(C3dual, A1dual', B1) + Octavian.matmul_serial!(C4dual, A2dual', B2) + + Cref = zeros(Float64, size(C1)...) + LinearAlgebra.mul!(Cref, A1, B1) + @test reinterpretHD(Float64, C1dual) ≈ reinterpretHD(Float64, C2dual) ≈ reinterpretHD(Float64, C3dual) ≈ reinterpretHD(Float64, C4dual) ≈ Cref + end - @testset "two dual arrays" begin + @testset "two dual arrays" begin A1d = randdual.(A1) B1d = randdual.(B1) @test reinterpretHD(Float64, Octavian.matmul(A1d, B1d, 1.3)) ≈ From abcbfbf53d77a44fae00893faf42189722212f5b Mon Sep 17 00:00:00 2001 From: cheukhinhojerry Date: Sat, 17 Jun 2023 01:48:03 -0700 Subject: [PATCH 5/7] fix hyperduals ext --- ext/HyperDualNumbersExt.jl | 16 +++++++++++++ test/hyperduals.jl | 47 +++++++++++++++++--------------------- 2 files changed, 37 insertions(+), 26 deletions(-) diff --git a/ext/HyperDualNumbersExt.jl b/ext/HyperDualNumbersExt.jl index b086d94..0af5650 100644 --- a/ext/HyperDualNumbersExt.jl +++ b/ext/HyperDualNumbersExt.jl @@ -120,6 +120,14 @@ for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix) end C[p+1, m, n] = C[p+1, m, n] + α * Cₚₘₙ end + + @tturbo for n ∈ indices((B, C), 3), m ∈ indices((A, C), 2) + Cₘₙ = zero(eltype(C)) + for k ∈ indices((A, B), (3, 2)) + Cₘₙ += A[2, m, k] * B[3, k, n] + A[3, m, k] * B[2, k, n] + end + C[4, m, n] = C[4, m, n] + α * Cₘₙ + end _C end @@ -226,6 +234,14 @@ for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix) end C[p+1, m, n] = C[p+1, m, n] + α * Cₚₘₙ end + + @tturbo for n ∈ indices((B, C), 3), m ∈ indices((A, C), 2) + Cₘₙ = zero(eltype(C)) + for k ∈ indices((A, B), (3, 2)) + Cₘₙ += A[2, m, k] * B[3, k, n] + A[3, m, k] * B[2, k, n] + end + C[4, m, n] = C[4, m, n] + α * Cₘₙ + end _C end end # for diff --git a/test/hyperduals.jl b/test/hyperduals.jl index bdf0d23..2302a57 100644 --- a/test/hyperduals.jl +++ b/test/hyperduals.jl @@ -1,7 +1,7 @@ function randdual(x) _x = zeros(HyperDualNumbers.Hyper{Float64}, size(x)...) for i in eachindex(x) - _x = HyperDualNumbers.Hyper(x[i], rand(), rand(), rand()) + _x[i] = HyperDualNumbers.Hyper(x[i], randn(), randn(), randn()) end return _x end @@ -24,11 +24,8 @@ end β = Float64(2.0) @testset "real array from the right" begin - A1dual = zeros(HyperDualNumbers.Hyper{Float64}, size(A1)...) - C1dual = zeros(HyperDualNumbers.Hyper{Float64}, size(C1)...) - - A1dual .= A1 - C1dual .= C1 + A1dual = randdual(A1) + C1dual = randdual(C1) A2dual = deepcopy(A1dual) B2 = deepcopy(B1) @@ -36,15 +33,12 @@ end Octavian.matmul!(C1dual, A1dual, B1, α, β) LinearAlgebra.mul!(C2dual, A2dual, B2, α, β) - @test reinterpretHD(Float64, C1dual) ≈ reinterpretHD(Float64, C2dual) + @test reinterpret(Float64, C1dual) ≈ reinterpret(Float64, C2dual) end @testset "real array from the left" begin - B1dual = zeros(HyperDualNumbers.Hyper{Float64}, size(B1)...) - C1dual = zeros(HyperDualNumbers.Hyper{Float64}, size(C1)...) - - B1dual .= B1 - C1dual .= C1 + B1dual = randdual(B1) + C1dual = randdual(C1) A2 = deepcopy(A1) B2dual = deepcopy(B1dual) @@ -55,15 +49,13 @@ end Octavian.matmul!(C1dual, A1, B1dual, α, β) LinearAlgebra.mul!(C2dual, A2, B2dual, α, β) - @test reinterpretHD(Float64, C1dual) ≈ reinterpretHD(Float64, C2dual) + @test reinterpret(Float64, C1dual) ≈ reinterpret(Float64, C2dual) end @testset "transposed arrays" begin - - A1dual = zeros(HyperDualNumbers.Hyper{Float64}, reverse(size(A1))...) - A1dual .= A1' - C1dual = zeros(HyperDualNumbers.Hyper{Float64}, size(C1)...) + A1dual = randdual(A1') + C1dual = randdual(C1) A2dual = deepcopy(A1dual) @@ -79,23 +71,26 @@ end Cref = zeros(Float64, size(C1)...) LinearAlgebra.mul!(Cref, A1, B1) - @test reinterpretHD(Float64, C1dual) ≈ reinterpretHD(Float64, C2dual) ≈ reinterpretHD(Float64, C3dual) ≈ reinterpretHD(Float64, C4dual) ≈ Cref + @test (reinterpretHD(Float64, C1dual) ≈ reinterpretHD(Float64, C2dual) ≈ + reinterpretHD(Float64, C3dual) ≈ reinterpretHD(Float64, C4dual) ≈ Cref) && + (reinterpret(Float64, C1dual) ≈ reinterpret(Float64, C2dual) ≈ + reinterpret(Float64, C3dual) ≈ reinterpret(Float64, C4dual) ) end @testset "two dual arrays" begin - A1d = randdual.(A1) - B1d = randdual.(B1) - @test reinterpretHD(Float64, Octavian.matmul(A1d, B1d, 1.3)) ≈ - reinterpretHD(Float64, Octavian.matmul_serial(A1d, B1d, 1.3)) ≈ - reinterpretHD(Float64, (A1d * B1d) .* 1.3) - @test reinterpretHD( + A1d = randdual(A1) + B1d = randdual(B1) + @test reinterpret(Float64, Octavian.matmul(A1d, B1d, 1.3)) ≈ + reinterpret(Float64, Octavian.matmul_serial(A1d, B1d, 1.3)) ≈ + reinterpret(Float64, (A1d * B1d) .* 1.3) + @test reinterpret( Float64, Octavian.matmul(@view(A1d[begin:end-1, :]), B1d) ) ≈ - reinterpretHD( + reinterpret( Float64, Octavian.matmul_serial(@view(A1d[begin:end-1, :]), B1d) ) ≈ - reinterpretHD(Float64, @view(A1d[begin:end-1, :]) * B1d) + reinterpret(Float64, @view(A1d[begin:end-1, :]) * B1d) end end From 61811b0758f346e040b9d2450ceda682d94b7f2e Mon Sep 17 00:00:00 2001 From: cheukhinhojerry Date: Sat, 17 Jun 2023 19:27:37 -0700 Subject: [PATCH 6/7] bump version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index f301863..9054165 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Octavian" uuid = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4" authors = ["Chris Elrod", "Dilum Aluthge", "Mason Protter", "contributors"] -version = "0.3.22" +version = "0.3.23" [deps] CPUSummary = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9" From 1a10ea435fccda07a2716d0d0d229b67c0df88a3 Mon Sep 17 00:00:00 2001 From: cheukhinhojerry Date: Sat, 17 Jun 2023 19:32:51 -0700 Subject: [PATCH 7/7] some format fix --- test/hyperduals.jl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/hyperduals.jl b/test/hyperduals.jl index 2302a57..0891b4a 100644 --- a/test/hyperduals.jl +++ b/test/hyperduals.jl @@ -44,9 +44,6 @@ end B2dual = deepcopy(B1dual) C2dual = deepcopy(C1dual) - α = Float64(2.0) - β = Float64(2.0) - Octavian.matmul!(C1dual, A1, B1dual, α, β) LinearAlgebra.mul!(C2dual, A2, B2dual, α, β) @test reinterpret(Float64, C1dual) ≈ reinterpret(Float64, C2dual) @@ -57,7 +54,6 @@ end A1dual = randdual(A1') C1dual = randdual(C1) - A2dual = deepcopy(A1dual) B2 = deepcopy(B1) C2dual = deepcopy(C1dual)