Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
name = "Octavian"
uuid = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4"
authors = ["Chris Elrod", "Dilum Aluthge", "Mason Protter", "contributors"]
version = "0.3.22"
version = "0.3.23"

[deps]
CPUSummary = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
HyperDualNumbers = "50ceba7f-c3ee-5a84-a6e8-3ad40456ec97"
IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
ManualMemory = "d125e4d3-2237-4719-b19c-fa641b8a4667"
Expand All @@ -16,9 +17,16 @@ StaticArrayInterface = "0d7ed370-da01-4f52-bd93-41d350b8b718"
ThreadingUtilities = "8290d209-cae3-49c0-8002-c8c24d57dab5"
VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"

[weakdeps]
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"

[extensions]
ForwardDiffExt = "ForwardDiff"

[compat]
CPUSummary = "0.1.26, 0.2.1"
ForwardDiff = "0.10"
HyperDualNumbers = "4"
IfElse = "0.1"
LoopVectorization = "0.12.86"
ManualMemory = "0.1.1"
Expand All @@ -30,13 +38,11 @@ ThreadingUtilities = "0.5"
VectorizationBase = "0.21.15"
julia = "1.6"

[extensions]
ForwardDiffExt = "ForwardDiff"

[extras]
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
HyperDualNumbers = "50ceba7f-c3ee-5a84-a6e8-3ad40456ec97"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
Expand All @@ -45,7 +51,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"

[targets]
test = ["Aqua", "BenchmarkTools", "ForwardDiff", "InteractiveUtils", "LinearAlgebra", "LoopVectorization", "Random", "VectorizationBase", "Test"]

[weakdeps]
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
test = ["Aqua", "BenchmarkTools", "ForwardDiff", "HyperDualNumbers", "InteractiveUtils", "LinearAlgebra", "LoopVectorization", "Random", "VectorizationBase", "Test"]
249 changes: 249 additions & 0 deletions ext/HyperDualNumbersExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
module HyperDualNumbersExt

using HyperDualNumbers: Hyper
using Octavian: ArrayInterface,
@turbo, @tturbo,
One, Zero,
indices, static
import Octavian: real_rep, _matmul!, _matmul_serial!

real_rep(a::AbstractArray{DualT}) where {T,DualT<:Hyper{T}} =
reinterpret(reshape, T, a)
_view1(B::AbstractMatrix) = @view(B[1, :])
_view1(B::AbstractArray{<:Any,3}) = @view(B[1, :, :])

for AbstractVectorOrMatrix in (:AbstractVector, :AbstractMatrix)
# multiplication of dual vector/matrix by standard matrix from the left
@eval function _matmul!(
_C::$(AbstractVectorOrMatrix){DualT},
A::AbstractMatrix,
_B::$(AbstractVectorOrMatrix){DualT},
α,
β = Zero(),
nthread::Nothing = nothing,
MKN = nothing,
contig_axis = nothing
) where {T, DualT<:Hyper{T}}
B = real_rep(_B)
C = real_rep(_C)

@tturbo for n ∈ indices((C, B), 3),
m ∈ indices((C, A), (2, 1)),
l in indices((C, B), 1)

Cₗₘₙ = zero(eltype(C))
for k ∈ indices((A, B), 2)
Cₗₘₙ += A[m, k] * B[l, k, n]
end
C[l, m, n] = α * Cₗₘₙ + β * C[l, m, n]
end

_C
end

# multiplication of dual matrix by standard vector/matrix from the right
@eval @inline function _matmul!(
_C::$(AbstractVectorOrMatrix){DualT},
_A::AbstractMatrix{DualT},
B::$(AbstractVectorOrMatrix),
α = One(),
β = Zero(),
nthread::Nothing = nothing,
MKN = nothing
) where {T,DualT<:Hyper{T}}
if Bool(ArrayInterface.is_dense(_C)) &&
Bool(ArrayInterface.is_column_major(_C)) &&
Bool(ArrayInterface.is_dense(_A)) &&
Bool(ArrayInterface.is_column_major(_A))
# we can avoid the reshape and call the standard method
A = reinterpret(T, _A)
C = reinterpret(T, _C)
_matmul!(C, A, B, α, β, nthread, nothing)
else
# we cannot use the standard method directly
A = real_rep(_A)
C = real_rep(_C)

@tturbo for n ∈ indices((C, B), (3, 2)),
m ∈ indices((C, A), 2),
l in indices((C, A), 1)

Cₗₘₙ = zero(eltype(C))
for k ∈ indices((A, B), (3, 1))
Cₗₘₙ += A[l, m, k] * B[k, n]
end
C[l, m, n] = α * Cₗₘₙ + β * C[l, m, n]
end
end

_C
end

@eval @inline function _matmul!(
_C::$(AbstractVectorOrMatrix){DualT},
_A::AbstractMatrix{DualT},
_B::$(AbstractVectorOrMatrix){DualT},
α = One(),
β = Zero(),
nthread::Nothing = nothing,
MKN = nothing,
contig = nothing
) where {T,DualT<:Hyper{T}}
A = real_rep(_A)
C = real_rep(_C)
B = real_rep(_B)
if Bool(ArrayInterface.is_dense(_C)) &&
Bool(ArrayInterface.is_column_major(_C)) &&
Bool(ArrayInterface.is_dense(_A)) &&
Bool(ArrayInterface.is_column_major(_A))
# we can avoid the reshape and call the standard method
Ar = reinterpret(T, _A)
Cr = reinterpret(T, _C)
_matmul!(Cr, Ar, _view1(B), α, β, nthread, nothing)
else
# we cannot use the standard method directly
@tturbo for n ∈ indices((C, B), 3),
m ∈ indices((C, A), 2),
l in indices((C, A), 1)

Cₗₘₙ = zero(eltype(C))
for k ∈ indices((A, B), (3, 2))
Cₗₘₙ += A[l, m, k] * B[1, k, n]
end
C[l, m, n] = α * Cₗₘₙ + β * C[l, m, n]
end
end
@tturbo for n ∈ indices((B, C), 3), m ∈ indices((A, C), 2), p ∈ 1:3
Cₚₘₙ = zero(eltype(C))
for k ∈ indices((A, B), (3, 2))
Cₚₘₙ += A[1, m, k] * B[p+1, k, n]
end
C[p+1, m, n] = C[p+1, m, n] + α * Cₚₘₙ
end

@tturbo for n ∈ indices((B, C), 3), m ∈ indices((A, C), 2)
Cₘₙ = zero(eltype(C))
for k ∈ indices((A, B), (3, 2))
Cₘₙ += A[2, m, k] * B[3, k, n] + A[3, m, k] * B[2, k, n]
end
C[4, m, n] = C[4, m, n] + α * Cₘₙ
end
_C
end

# multiplication of dual vector/matrix by standard matrix from the left
@eval function _matmul_serial!(
_C::$(AbstractVectorOrMatrix){DualT},
A::AbstractMatrix,
_B::$(AbstractVectorOrMatrix){DualT},
α,
β,
MKN
) where {T, DualT<:Hyper{T}}
B = real_rep(_B)
C = real_rep(_C)

@turbo for n ∈ indices((C, B), 3),
m ∈ indices((C, A), (2, 1)),
l in indices((C, B), 1)

Cₗₘₙ = zero(eltype(C))
for k ∈ indices((A, B), 2)
Cₗₘₙ += A[m, k] * B[l, k, n]
end
C[l, m, n] = α * Cₗₘₙ + β * C[l, m, n]
end

_C
end

# multiplication of dual matrix by standard vector/matrix from the right
@eval @inline function _matmul_serial!(
_C::$(AbstractVectorOrMatrix){DualT},
_A::AbstractMatrix{DualT},
B::$(AbstractVectorOrMatrix),
α,
β,
MKN
) where {T,DualT<:Hyper{T}}
if Bool(ArrayInterface.is_dense(_C)) &&
Bool(ArrayInterface.is_column_major(_C)) &&
Bool(ArrayInterface.is_dense(_A)) &&
Bool(ArrayInterface.is_column_major(_A))
# we can avoid the reshape and call the standard method
A = reinterpret(T, _A)
C = reinterpret(T, _C)
_matmul_serial!(C, A, B, α, β, nothing)
else
# we cannot use the standard method directly
A = real_rep(_A)
C = real_rep(_C)

@turbo for n ∈ indices((C, B), (3, 2)),
m ∈ indices((C, A), 2),
l in indices((C, A), 1)

Cₗₘₙ = zero(eltype(C))
for k ∈ indices((A, B), (3, 1))
Cₗₘₙ += A[l, m, k] * B[k, n]
end
C[l, m, n] = α * Cₗₘₙ + β * C[l, m, n]
end
end

_C
end

@eval @inline function _matmul_serial!(
_C::$(AbstractVectorOrMatrix){DualT},
_A::AbstractMatrix{DualT},
_B::$(AbstractVectorOrMatrix){DualT},
α,
β,
MKN
) where {T, DualT<:Hyper{T}}
A = real_rep(_A)
C = real_rep(_C)
B = real_rep(_B)
if Bool(ArrayInterface.is_dense(_C)) &&
Bool(ArrayInterface.is_column_major(_C)) &&
Bool(ArrayInterface.is_dense(_A)) &&
Bool(ArrayInterface.is_column_major(_A))
# we can avoid the reshape and call the standard method
Ar = reinterpret(T, _A)
Cr = reinterpret(T, _C)
_matmul_serial!(Cr, Ar, _view1(B), α, β, nothing)
else
# we cannot use the standard method directly
@turbo for n ∈ indices((C, B), 3),
m ∈ indices((C, A), 2),
l in indices((C, A), 1)

Cₗₘₙ = zero(eltype(C))
for k ∈ indices((A, B), (3, 2))
Cₗₘₙ += A[l, m, k] * B[1, k, n]
end
C[l, m, n] = α * Cₗₘₙ + β * C[l, m, n]
end
end

@turbo for n ∈ indices((B, C), 3), m ∈ indices((A, C), 2), p ∈ 1:3
Cₚₘₙ = zero(eltype(C))
for k ∈ indices((A, B), (3, 2))
Cₚₘₙ += A[1, m, k] * B[p+1, k, n]
end
C[p+1, m, n] = C[p+1, m, n] + α * Cₚₘₙ
end

@tturbo for n ∈ indices((B, C), 3), m ∈ indices((A, C), 2)
Cₘₙ = zero(eltype(C))
for k ∈ indices((A, B), (3, 2))
Cₘₙ += A[2, m, k] * B[3, k, n] + A[3, m, k] * B[2, k, n]
end
C[4, m, n] = C[4, m, n] + α * Cₘₙ
end
_C
end
end # for

end # module
3 changes: 3 additions & 0 deletions src/Octavian.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ if !isdefined(Base, :get_extension)
include("../ext/ForwardDiffExt.jl")
end

# TODO: confirm when we need this extension
include("../ext/HyperDualNumbersExt.jl")

@static if VERSION >= v"1.8.0-beta1"
@setup_workload begin
# Putting some things in `setup` can reduce the size of the
Expand Down
Loading