feat: update training api to account for thunking

avik-pal · avik-pal · commit e1c3a4ae95ec · 2025-01-23T15:25:20.000-05:00
diff --git a/ext/LuxZygoteExt/training.jl b/ext/LuxZygoteExt/training.jl
@@ -1,8 +1,17 @@
 function Lux.Training.compute_gradients_impl(
         ::AutoZygote, objective_function::F, data, ts::Lux.Training.TrainState) where {F}
-    (loss, st, stats), back = Zygote.pullback(
-        objective_function, ts.model, ts.parameters, ts.states, data)
-    grads = back((one(loss), nothing, nothing))[2]
+    @static if pkgversion(Zygote) ≥ v"0.7-"
+        # Zygote 0.7 doesn't aggressively unthunk everything, so it is better to use a
+        # closure here
+        (loss, st, stats), back = Zygote.pullback(
+            ps -> objective_function(ts.model, ps, ts.states, data), ts.parameters)
+        grads = only(back((one(loss), nothing, nothing)))
+    else
+        (loss, st, stats), back = Zygote.pullback(
+            objective_function, ts.model, ts.parameters, ts.states, data
+        )
+        grads = back((one(loss), nothing, nothing))[2]
+    end
     @set! ts.states = st
     return grads, loss, stats, ts
 end