FluxML · axsk · Feb 22, 2019 · Apr 12, 2021 · Apr 12, 2021 · Apr 12, 2021
diff --git a/src/lib/array.jl b/src/lib/array.jl
@@ -32,16 +32,23 @@ end
 
 @adjoint view(x::AbstractArray, inds...) = view(x, inds...), ∇getindex(x, inds)
 
-∇getindex(x::AbstractArray, inds) = dy -> begin
-  if inds isa  NTuple{<:Any, Integer}
-    dx = _zero(x, typeof(dy))
-    dx[inds...] = dy
-  else
-    dx = _zero(x, eltype(dy))
-    dxv = view(dx, inds...)
-    dxv .= accum.(dxv, _droplike(dy, dxv))
-  end
-  return (dx, map(_->nothing, inds)...)
+∇getindex(x::AbstractArray, inds) = dy -> (_zerosetindex(x, inds, dy), map(_->nothing, inds)...)
+
+function _zerosetindex(x, inds::NTuple{<:Any, Integer}, dy)
+  dx = _zero(x, typeof(dy))
+  dx[inds...] = dy
+  dx
+end
+
+function _zerosetindex(x, inds, dy)
+  dx = _zero(x, eltype(dy))
+  dxv = view(dx, inds...)
+  dxv .= accum.(dxv, _droplike(dy, dxv))
+  dx
+end
+
+@adjoint function _zerosetindex(x, inds, dy)
+  _zerosetindex(x, inds, dy), ddx -> (nothing, nothing, ddx[inds...])
 end
 
 _zero(xs::AbstractArray{<:Number}, T::Type{Nothing}) = fill!(similar(xs), zero(eltype(xs)))

diff --git a/test/features.jl b/test/features.jl
@@ -481,3 +481,21 @@ end
   Zygote.gradient(loss_adjoint,[1.0])
   @test x[1] == x[2]
 end
+
+# Basic nested
+f_nested(x) = x^4
+@test f_nested''(1.0) == 12.0
+
+# Nested AD for `sum`
+@test gradient([1.0, 2.0]) do x
+    gradient(x) do x
+        sin(sum(x))
+    end[1][1]
+end[1][1] == -sin(3.0)
+
+# Nested AD for getindex
+@test gradient([1.0, 2.0]) do x
+   gradient(x) do x
+       sin(x[1])
+   end[1][1]
+end[1][1] == -sin(1.0)
diff --git a/test/gradcheck.jl b/test/gradcheck.jl
@@ -4,6 +4,7 @@ using Zygote: gradient
 using Base.Broadcast: broadcast_shape
 using Distributed: pmap, CachingPool, workers
 import FiniteDifferences
+import ForwardDiff
 
 function ngradient(f, xs::AbstractArray...)
   grads = zero.(xs)
@@ -1672,3 +1673,27 @@ end
     gradient(x->norm(x*[1im, 1]), 1.23)
     gradient(x->norm(x*[1im 1]), 1.23)
 end
+
+# Currently disabled pending improvements in Zygote and Base
+if false
+  @testset "third order AD (indexing)" begin
+    # Nested AD for getindex
+    grad_tracker = gradient([1.0, 2.0, 3.0]) do x
+      sum(gradient(x) do x
+        sum(gradient(x) do x
+          sum(x[1:2])^4
+        end[1])
+      end[1])
+    end[1]
+    # We compare to ForwardDiff, since the high order derivative is not
+    # numerically stable under finite differencing.
+    grad_forward = ForwardDiff.gradient([1.0, 2.0, 3.0]) do x
+      sum(ForwardDiff.gradient(x) do x
+        sum(ForwardDiff.gradient(x) do x
+          sum(x[1:2])^4
+        end)
+      end)
+    end
+    @test grad_tracker ≈ grad_forward ≈ [288.0, 288.0, 0.0]
+  end
+end
diff --git a/test/utils.jl b/test/utils.jl
@@ -3,13 +3,8 @@ using Zygote: hessian_dual, hessian_reverse
 
 @testset "hessian: $hess" for hess in [hessian_dual, hessian_reverse]
 
-  if hess == hessian_dual
-    @test hess(x -> x[1]*x[2], randn(2)) ≈ [0 1; 1 0]
-    @test hess(((x,y),) -> x*y, randn(2)) ≈ [0 1; 1 0]  # original docstring version
-  else
-    @test_broken hess(x -> x[1]*x[2], randn(2)) ≈ [0 1; 1 0]  # can't differentiate ∇getindex
-    @test_broken hess(((x,y),) -> x*y, randn(2)) ≈ [0 1; 1 0]
-  end
+  @test hess(x -> x[1]*x[2], randn(2)) ≈ [0 1; 1 0]
+  @test hess(((x,y),) -> x*y, randn(2)) ≈ [0 1; 1 0]  # original docstring version
   @test hess(x -> sum(x.^3), [1 2; 3 4]) ≈ Diagonal([6, 18, 12, 24])
   @test hess(sin, pi/2) ≈ -1