FluxML · bors · May 1, 2020 · Sep 27, 2019 · Sep 27, 2019 · Sep 27, 2019
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -21,7 +21,7 @@ Data should be stored in WHCN order (width, height, # channels, # batches).
 In other words, a 100×100 RGB image would be a `100×100×3×1` array,
 and a batch of 50 would be a `100×100×3×50` array.
 
-Takes the keyword arguments `pad`, `stride` and `dilation`.
+Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`.
 """
 struct Conv{N,M,F,A,V}
   σ::F
@@ -30,29 +30,34 @@ struct Conv{N,M,F,A,V}
   stride::NTuple{N,Int}
   pad::NTuple{M,Int}
   dilation::NTuple{N,Int}
+  use_bias::Bool
 end
 
 function Conv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
-              stride = 1, pad = 0, dilation = 1) where {T,N}
+              stride = 1, pad = 0, dilation = 1, use_bias = true) where {T,N}
   stride = expand(Val(N-2), stride)
   pad = expand(Val(2*(N-2)), pad)
   dilation = expand(Val(N-2), dilation)
-  return Conv(σ, w, b, stride, pad, dilation)
+  return Conv(σ, w, b, stride, pad, dilation, use_bias)
 end
 
 Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity;
-     init = glorot_uniform,  stride = 1, pad = 0, dilation = 1) where N =
+     init = glorot_uniform,  stride = 1, pad = 0, dilation = 1, use_bias = true) where N =
   Conv(init(k..., ch...), zeros(ch[2]), σ,
-       stride = stride, pad = pad, dilation = dilation)
+       stride = stride, pad = pad, dilation = dilation, use_bias = use_bias)
 
 @functor Conv
 
 function (c::Conv)(x::AbstractArray)
   # TODO: breaks gpu broadcast :(
   # ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1)))
-  σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
   cdims = DenseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation)
-  σ.(conv(x, c.weight, cdims) .+ b)
+  if c.use_bias
+    σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
+    σ.(conv(x, c.weight, cdims) .+ b)
+  else
+    c.σ.(conv(x, c.weight, cdims))
+  end
 end
 
 function Base.show(io::IO, l::Conv)
@@ -78,7 +83,7 @@ Standard convolutional transpose layer. `size` should be a tuple like `(2, 2)`.
 Data should be stored in WHCN order. In other words, a 100×100 RGB image would
 be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.
 
-Takes the keyword arguments `pad`, `stride` and `dilation`.
+Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`.
 """
 struct ConvTranspose{N,M,F,A,V}
   σ::F
@@ -87,20 +92,21 @@ struct ConvTranspose{N,M,F,A,V}
   stride::NTuple{N,Int}
   pad::NTuple{M,Int}
   dilation::NTuple{N,Int}
+  use_bias::Bool
 end
 
 function ConvTranspose(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
-              stride = 1, pad = 0, dilation = 1) where {T,N}
+              stride = 1, pad = 0, dilation = 1, use_bias = true) where {T,N}
   stride = expand(Val(N-2), stride)
   pad = expand(Val(2*(N-2)), pad)
   dilation = expand(Val(N-2), dilation)
-  return ConvTranspose(σ, w, b, stride, pad, dilation)
+  return ConvTranspose(σ, w, b, stride, pad, dilation, use_bias)
 end
 
 ConvTranspose(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity;
-              init = glorot_uniform, stride = 1, pad = 0, dilation = 1) where N =
+              init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N =
 ConvTranspose(init(k..., reverse(ch)...), zeros(ch[2]), σ,
-              stride = stride, pad = pad, dilation = dilation)
+              stride = stride, pad = pad, dilation = dilation, use_bias = use_bias)
 
 @functor ConvTranspose
 
@@ -120,9 +126,13 @@ end
 
 function (c::ConvTranspose)(x::AbstractArray)
   # ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1)))
-  σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
   cdims = conv_transpose_dims(c, x)
-  return σ.(∇conv_data(x, c.weight, cdims) .+ b)
+  if c.use_bias
+    σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
+    σ.(∇conv_data(x, c.weight, cdims) .+ b)
+  else
+    c.σ.(∇conv_data(x, c.weight, cdims))
+  end
 end
 
 function Base.show(io::IO, l::ConvTranspose)
@@ -148,7 +158,7 @@ Note that `out` must be an integer multiple of `in`.
 Data should be stored in WHCN order. In other words, a 100×100 RGB image would
 be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.
 
-Takes the keyword arguments `pad`, `stride` and `dilation`.
+Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`.
 """
 struct DepthwiseConv{N,M,F,A,V}
   σ::F
@@ -157,35 +167,41 @@ struct DepthwiseConv{N,M,F,A,V}
   stride::NTuple{N,Int}
   pad::NTuple{M,Int}
   dilation::NTuple{N,Int}
+  use_bias::Bool
 end
 
 function DepthwiseConv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
-                       stride = 1, pad = 0, dilation = 1) where {T,N}
+                       stride = 1, pad = 0, dilation = 1, use_bias = true) where {T,N}
   stride = expand(Val(N-2), stride)
   pad = expand(Val(2*(N-2)), pad)
   dilation = expand(Val(N-2), dilation)
-  return DepthwiseConv(σ, w, b, stride, pad, dilation)
+  return DepthwiseConv(σ, w, b, stride, pad, dilation, use_bias)
 end
 
 function DepthwiseConv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity;
-     init = glorot_uniform, stride = 1, pad = 0, dilation = 1) where N
+     init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N
   @assert ch[2] % ch[1] == 0 "Output channels must be integer multiple of input channels"
   return DepthwiseConv(
     init(k..., div(ch[2], ch[1]), ch[1]),
     zeros(ch[2]),
     σ;
     stride = stride,
     pad = pad,
-    dilation = dilation
+    dilation = dilation,
+    use_bias = use_bias
   )
 end
 
 @functor DepthwiseConv
 
 function (c::DepthwiseConv)(x)
-  σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
   cdims = DepthwiseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation)
-  σ.(depthwiseconv(x, c.weight, cdims) .+ b)
+  if c.use_bias
+    σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
+    σ.(depthwiseconv(x, c.weight, cdims) .+ b)
+  else
+    c.σ.(depthwiseconv(x, c.weight, cdims))
+  end
 end
 
 function Base.show(io::IO, l::DepthwiseConv)
@@ -220,7 +236,7 @@ Data should be stored in WHCN order (width, height, # channels, # batches).
 In other words, a 100×100 RGB image would be a `100×100×3×1` array,
 and a batch of 50 would be a `100×100×3×50` array.
 
-Takes the keyword arguments `pad`, `stride` and `dilation`.
+Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`.
 """
 struct CrossCor{N,M,F,A,V}
   σ::F
@@ -229,20 +245,21 @@ struct CrossCor{N,M,F,A,V}
   stride::NTuple{N,Int}
   pad::NTuple{M,Int}
   dilation::NTuple{N,Int}
+  use_bias::Bool
 end
 
 function CrossCor(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
-              stride = 1, pad = 0, dilation = 1) where {T,N}
+              stride = 1, pad = 0, dilation = 1, use_bias = true) where {T,N}
   stride = expand(Val(N-2), stride)
   pad = expand(Val(2*(N-2)), pad)
   dilation = expand(Val(N-2), dilation)
-  return CrossCor(σ, w, b, stride, pad, dilation)
+  return CrossCor(σ, w, b, stride, pad, dilation, use_bias)
 end
 
 CrossCor(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity;
-     init = glorot_uniform, stride = 1, pad = 0, dilation = 1) where N =
+     init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N =
   CrossCor(init(k..., ch...), zeros(ch[2]), σ,
-       stride = stride, pad = pad, dilation = dilation)
+       stride = stride, pad = pad, dilation = dilation, use_bias = use_bias)
 
 @functor CrossCor
 
@@ -254,9 +271,13 @@ end
 function (c::CrossCor)(x::AbstractArray)
   # TODO: breaks gpu broadcast :(
   # ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1)))
-  σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
   cdims = DenseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation)
-  σ.(crosscor(x, c.weight, cdims) .+ b)
+  if c.use_bias
+    σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
+    σ.(crosscor(x, c.weight, cdims) .+ b)
+  else
+    c.σ.(crosscor(x, c.weight, cdims))
+  end
 end
 
 function Base.show(io::IO, l::CrossCor)

diff --git a/test/layers/conv.jl b/test/layers/conv.jl
@@ -20,6 +20,17 @@ end
     Dense(288, 10), softmax)
 
   @test size(m(r)) == (10, 5)
+
+  # Test bias switch
+  bias = Conv(ones(Float32, 2, 2, 1, 3), ones(Float32, 3))
+  ip = zeros(Float32, 28,28,1,1)
+
+  op = bias(ip)
+  @test sum(op) == prod(size(op))
+
+  bias = Conv(ones(Float32, 2, 2, 1, 3), ones(Float32, 3), use_bias = false)
+  op = bias(ip)
+  @test sum(op) === 0.f0
 end
 
 @testset "asymmetric padding" begin