Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
5ea6a33
make bias optional
Sep 27, 2019
9f2ac8f
ditto remaining conv layers
Sep 27, 2019
a801fcb
docstrings
Sep 27, 2019
dced8c0
use ZeroType
Oct 1, 2019
1fe3217
add to docs
Oct 1, 2019
55ef7c1
add weight and bias kwargs
Oct 5, 2019
48a305b
ditto remaining layers
Oct 5, 2019
e97d61f
fixes
Oct 5, 2019
d00f833
rm ZeroType
Oct 5, 2019
2ae3ad3
doc fixes
Oct 5, 2019
214f71f
add N
Oct 5, 2019
a1e826b
fixes
Oct 5, 2019
f3904b4
add ZeroType back
Oct 8, 2019
040697f
add bias and weight kwarg
Oct 8, 2019
b596faa
tests bias switch
Oct 8, 2019
95c5845
document bias switch
Oct 8, 2019
49ea43e
ZeroType => Zeros
Oct 8, 2019
c85bad4
replace weight with filter
Oct 8, 2019
4a183ae
make Zeros a dimensionlesss number
Oct 22, 2019
7c90fb4
use array to define Zeros
Oct 23, 2019
a4a987f
hook into bcasting
Nov 7, 2019
e89b8eb
fixes
Nov 12, 2019
eb41715
define manual rules
Nov 19, 2019
2455630
cleaner API
Nov 27, 2019
ec872bb
test that bias has no grads with Zeros
Nov 27, 2019
f39e184
rm Zeros warning
Dec 9, 2019
894c075
rm Zeros setindex
Dec 9, 2019
a72ca2b
fix args
Dec 9, 2019
b9fbee1
::typeof(op) -> op
Jan 31, 2020
bc20103
no-op copy
Jan 31, 2020
f889d0c
add kwarg constructors
Feb 26, 2020
58211e3
docs improve
Feb 26, 2020
cd93179
more docs and constructors
Feb 26, 2020
cf82393
type signatures
Feb 26, 2020
20e78e2
docs fix
Feb 26, 2020
7e308e7
rm unneccesary fns
Mar 4, 2020
d8e44fc
correct broadcasting for addition
Mar 4, 2020
5086c0f
merge conflicts
Apr 29, 2020
534809a
move zeros to its own file
Apr 29, 2020
29215fa
comment on possible future deprecations
Apr 29, 2020
8f877f2
quick fix
MikeInnes May 1, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 49 additions & 28 deletions src/layers/conv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Data should be stored in WHCN order (width, height, # channels, # batches).
In other words, a 100×100 RGB image would be a `100×100×3×1` array,
and a batch of 50 would be a `100×100×3×50` array.

Takes the keyword arguments `pad`, `stride` and `dilation`.
Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`.
"""
struct Conv{N,M,F,A,V}
σ::F
Expand All @@ -30,29 +30,34 @@ struct Conv{N,M,F,A,V}
stride::NTuple{N,Int}
pad::NTuple{M,Int}
dilation::NTuple{N,Int}
use_bias::Bool
end

function Conv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
stride = 1, pad = 0, dilation = 1) where {T,N}
stride = 1, pad = 0, dilation = 1, use_bias = true) where {T,N}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is weird though that when calling this constructor with use_bias=false one has to pass a vector b as well.
I would suggest the following non-breaking change instead of the use_bias flag:

  • relax the signature to
Conv(w::AbstractArray{T,N}, b::Union{Nothing,AbstractVector{T}}, σ = identity; ...)

and have a call to

Conv(w, nothing)

construct a Conv layer with no bias

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be able to support Conv(w, nothing) and make it a bit more extensible now, I think

stride = expand(Val(N-2), stride)
pad = expand(Val(2*(N-2)), pad)
dilation = expand(Val(N-2), dilation)
return Conv(σ, w, b, stride, pad, dilation)
return Conv(σ, w, b, stride, pad, dilation, use_bias)
end

Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity;
init = glorot_uniform, stride = 1, pad = 0, dilation = 1) where N =
init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N =
Conv(init(k..., ch...), zeros(ch[2]), σ,
stride = stride, pad = pad, dilation = dilation)
stride = stride, pad = pad, dilation = dilation, use_bias = use_bias)

@functor Conv

function (c::Conv)(x::AbstractArray)
# TODO: breaks gpu broadcast :(
# ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1)))
σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
cdims = DenseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation)
σ.(conv(x, c.weight, cdims) .+ b)
if c.use_bias
σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
σ.(conv(x, c.weight, cdims) .+ b)
else
c.σ.(conv(x, c.weight, cdims))
end
end

function Base.show(io::IO, l::Conv)
Expand All @@ -78,7 +83,7 @@ Standard convolutional transpose layer. `size` should be a tuple like `(2, 2)`.
Data should be stored in WHCN order. In other words, a 100×100 RGB image would
be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.

Takes the keyword arguments `pad`, `stride` and `dilation`.
Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`.
"""
struct ConvTranspose{N,M,F,A,V}
σ::F
Expand All @@ -87,20 +92,21 @@ struct ConvTranspose{N,M,F,A,V}
stride::NTuple{N,Int}
pad::NTuple{M,Int}
dilation::NTuple{N,Int}
use_bias::Bool
end

function ConvTranspose(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
stride = 1, pad = 0, dilation = 1) where {T,N}
stride = 1, pad = 0, dilation = 1, use_bias = true) where {T,N}
stride = expand(Val(N-2), stride)
pad = expand(Val(2*(N-2)), pad)
dilation = expand(Val(N-2), dilation)
return ConvTranspose(σ, w, b, stride, pad, dilation)
return ConvTranspose(σ, w, b, stride, pad, dilation, use_bias)
end

ConvTranspose(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity;
init = glorot_uniform, stride = 1, pad = 0, dilation = 1) where N =
init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N =
ConvTranspose(init(k..., reverse(ch)...), zeros(ch[2]), σ,
stride = stride, pad = pad, dilation = dilation)
stride = stride, pad = pad, dilation = dilation, use_bias = use_bias)

@functor ConvTranspose

Expand All @@ -120,9 +126,13 @@ end

function (c::ConvTranspose)(x::AbstractArray)
# ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1)))
σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
cdims = conv_transpose_dims(c, x)
return σ.(∇conv_data(x, c.weight, cdims) .+ b)
if c.use_bias
σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
σ.(∇conv_data(x, c.weight, cdims) .+ b)
else
c.σ.(∇conv_data(x, c.weight, cdims))
end
end

function Base.show(io::IO, l::ConvTranspose)
Expand All @@ -148,7 +158,7 @@ Note that `out` must be an integer multiple of `in`.
Data should be stored in WHCN order. In other words, a 100×100 RGB image would
be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.

Takes the keyword arguments `pad`, `stride` and `dilation`.
Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`.
"""
struct DepthwiseConv{N,M,F,A,V}
σ::F
Expand All @@ -157,35 +167,41 @@ struct DepthwiseConv{N,M,F,A,V}
stride::NTuple{N,Int}
pad::NTuple{M,Int}
dilation::NTuple{N,Int}
use_bias::Bool
end

function DepthwiseConv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
stride = 1, pad = 0, dilation = 1) where {T,N}
stride = 1, pad = 0, dilation = 1, use_bias = true) where {T,N}
stride = expand(Val(N-2), stride)
pad = expand(Val(2*(N-2)), pad)
dilation = expand(Val(N-2), dilation)
return DepthwiseConv(σ, w, b, stride, pad, dilation)
return DepthwiseConv(σ, w, b, stride, pad, dilation, use_bias)
end

function DepthwiseConv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity;
init = glorot_uniform, stride = 1, pad = 0, dilation = 1) where N
init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N
@assert ch[2] % ch[1] == 0 "Output channels must be integer multiple of input channels"
return DepthwiseConv(
init(k..., div(ch[2], ch[1]), ch[1]),
zeros(ch[2]),
σ;
stride = stride,
pad = pad,
dilation = dilation
dilation = dilation,
use_bias = use_bias
)
end

@functor DepthwiseConv

function (c::DepthwiseConv)(x)
σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
cdims = DepthwiseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation)
σ.(depthwiseconv(x, c.weight, cdims) .+ b)
if c.use_bias
σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
σ.(depthwiseconv(x, c.weight, cdims) .+ b)
else
c.σ.(depthwiseconv(x, c.weight, cdims))
end
end

function Base.show(io::IO, l::DepthwiseConv)
Expand Down Expand Up @@ -220,7 +236,7 @@ Data should be stored in WHCN order (width, height, # channels, # batches).
In other words, a 100×100 RGB image would be a `100×100×3×1` array,
and a batch of 50 would be a `100×100×3×50` array.

Takes the keyword arguments `pad`, `stride` and `dilation`.
Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`.
"""
struct CrossCor{N,M,F,A,V}
σ::F
Expand All @@ -229,20 +245,21 @@ struct CrossCor{N,M,F,A,V}
stride::NTuple{N,Int}
pad::NTuple{M,Int}
dilation::NTuple{N,Int}
use_bias::Bool
end

function CrossCor(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
stride = 1, pad = 0, dilation = 1) where {T,N}
stride = 1, pad = 0, dilation = 1, use_bias = true) where {T,N}
stride = expand(Val(N-2), stride)
pad = expand(Val(2*(N-2)), pad)
dilation = expand(Val(N-2), dilation)
return CrossCor(σ, w, b, stride, pad, dilation)
return CrossCor(σ, w, b, stride, pad, dilation, use_bias)
end

CrossCor(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity;
init = glorot_uniform, stride = 1, pad = 0, dilation = 1) where N =
init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N =
CrossCor(init(k..., ch...), zeros(ch[2]), σ,
stride = stride, pad = pad, dilation = dilation)
stride = stride, pad = pad, dilation = dilation, use_bias = use_bias)

@functor CrossCor

Expand All @@ -254,9 +271,13 @@ end
function (c::CrossCor)(x::AbstractArray)
# TODO: breaks gpu broadcast :(
# ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1)))
σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
cdims = DenseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation)
σ.(crosscor(x, c.weight, cdims) .+ b)
if c.use_bias
σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
σ.(crosscor(x, c.weight, cdims) .+ b)
else
c.σ.(crosscor(x, c.weight, cdims))
end
end

function Base.show(io::IO, l::CrossCor)
Expand Down
11 changes: 11 additions & 0 deletions test/layers/conv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,17 @@ end
Dense(288, 10), softmax)

@test size(m(r)) == (10, 5)

# Test bias switch
bias = Conv(ones(Float32, 2, 2, 1, 3), ones(Float32, 3))
ip = zeros(Float32, 28,28,1,1)

op = bias(ip)
@test sum(op) == prod(size(op))

bias = Conv(ones(Float32, 2, 2, 1, 3), ones(Float32, 3), use_bias = false)
op = bias(ip)
@test sum(op) === 0.f0
end

@testset "asymmetric padding" begin
Expand Down