Remove activation on final layer of KeyNet and ICNN (#702)

marcocuturi · claude · web-flow · commit b79b8497d08b · 2026-06-13T22:15:56.000+02:00
* Remove activation on final layer of KeyNet and ICNN

The output layer of both KeyNet.gradient and ICNN.__call__ previously
applied the activation function (default ReLU) after the final layer.
This forced the outputs to be non-negative: KeyNet's predicted vectors
could not take signed values, and ICNN's scalar potential was clamped to
be non-negative.

Make the final layer linear in both networks. Convexity of the ICNN
output is preserved (a non-negatively weighted combination of convex
features remains convex).

Co-Authored-By: Claude Opus 4.8 (1M context) &lt;noreply@anthropic.com&gt;

* fix: yapf formatting + linen potential in cmonge_gap test

Reformat the final-layer loop in ICNN/KeyNet to satisfy yapf (the CI
"code" Lint check). Switch conditional_monge_gap_test to LinenPotentialMLP
so it uses the linen init/apply API it was written for, matching
monge_gap_test (the nnx PotentialMLP now requires input_dim/rngs).

Co-Authored-By: Claude Opus 4.8 (1M context) &lt;noreply@anthropic.com&gt;

---------

Co-authored-by: Claude Opus 4.8 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/src/ott/neural/networks/icnn.py b/src/ott/neural/networks/icnn.py
@@ -225,11 +225,14 @@ def __call__(self, x: jax.Array) -> jax.Array:
 
     z = self._act_fn_call(self.wx0(x))
 
-    for wx, wz in zip(self.wx_layers, self.wz_layers, strict=True):
-      if wx is not None:
-        z = self._act_fn_call(wz(z) + wx(x))
-      else:
-        z = self._act_fn_call(wz(z))
+    num_layers = len(self.wz_layers)
+    for i, (wx,
+            wz) in enumerate(zip(self.wx_layers, self.wz_layers, strict=True)):
+      z = wz(z) + wx(x) if wx is not None else wz(z)
+      # The final layer is linear: no activation, so the (convex) potential
+      # is an unconstrained combination of the last hidden features.
+      if i != num_layers - 1:
+        z = self._act_fn_call(z)
 
     if self.pos_def_potentials is not None:
       z = z + self.pos_def_potentials(x)
@@ -399,11 +402,14 @@ def gradient(self, x: jax.Array) -> jax.Array:
     batch_size, _ = x.shape
     z = self._act_fn_call(self.wx0(x))
 
-    for wx, wz in zip(self.wx_layers, self.wz_layers, strict=True):
-      if wx is not None:
-        z = self._act_fn_call(wz(z) + wx(x))
-      else:
-        z = self._act_fn_call(wz(z))
+    num_layers = len(self.wz_layers)
+    for i, (wx,
+            wz) in enumerate(zip(self.wx_layers, self.wz_layers, strict=True)):
+      z = wz(z) + wx(x) if wx is not None else wz(z)
+      # The final layer is linear: no activation, so the vector output can
+      # take arbitrary values (e.g. signed gradients).
+      if i != num_layers - 1:
+        z = self._act_fn_call(z)
 
     if self._resnet:
       z = x + z
diff --git a/tests/neural/methods/conditional_monge_gap_test.py b/tests/neural/methods/conditional_monge_gap_test.py
@@ -207,7 +207,7 @@ def test_non_negativity_neural_map(
     rng1, rng2 = jax.random.split(rng)
 
     source = jax.random.normal(rng1, (n, n_features))
-    model = potentials.PotentialMLP(dim_hidden=[8, 8], is_potential=False)
+    model = potentials.LinenPotentialMLP(dim_hidden=[8, 8], is_potential=False)
     params = model.init(rng2, x=source[0])
     target = model.apply(params, source)
     condition = jnp.repeat(jnp.arange(k), per_cond)