[nnx] support Array leaves in graph nodes

cgarciae · cgarciae · commit 18d375040592 · 2025-03-12T20:37:24.000-07:00
diff --git a/examples/nnx_toy_examples/07_array_leaves.py b/examples/nnx_toy_examples/07_array_leaves.py
@@ -0,0 +1,99 @@
+# Copyright 2024 The Flax Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# %%
+import jax
+import jax.numpy as jnp
+import matplotlib.pyplot as plt
+import numpy as np
+import optax
+
+from flax import nnx, struct
+
+X = np.linspace(0, 1, 100)[:, None]
+Y = 0.8 * X**2 + 0.1 + np.random.normal(0, 0.1, size=X.shape)
+
+
+def dataset(batch_size):
+  while True:
+    idx = np.random.choice(len(X), size=batch_size)
+    yield X[idx], Y[idx]
+
+class Linear(nnx.Module):
+  def __init__(self, din: int, dout: int, *, rngs: nnx.Rngs):
+    self.w = jax.random.normal(rngs.params(), (din, dout))
+    self.b = jnp.zeros((dout,))
+
+  def __call__(self, x):
+    return x @ self.w + self.b
+
+
+class MLP(nnx.Module):
+  def __init__(self, din, dhidden, dout, *, rngs: nnx.Rngs):
+    self.count = jnp.array(0)
+    self.linear1 = Linear(din, dhidden, rngs=rngs)
+    self.linear2 = Linear(dhidden, dout, rngs=rngs)
+
+  def __call__(self, x):
+    self.count += 1
+    return self.linear2(nnx.relu(self.linear1(x)))
+
+def is_param(path, value):
+  key = path[-1]
+  return key == 'w' or key == 'b'
+
+model = MLP(din=1, dhidden=32, dout=1, rngs=nnx.Rngs(0))
+tx = optax.sgd(1e-3)
+optimizer = nnx.Optimizer(model, tx, wrt=is_param)
+
+
+@nnx.jit
+def train_step(model: MLP, optimizer: nnx.Optimizer, batch):
+  x, y = batch
+
+  def loss_fn(model: MLP):
+    y_pred = model(x)
+    return jnp.mean((y - y_pred) ** 2)
+
+  diff_state = nnx.DiffState(0, is_param)
+  grads: nnx.State = nnx.grad(loss_fn, argnums=diff_state)(model)
+  optimizer.update(grads)
+
+
+@nnx.jit
+def test_step(model: MLP, batch):
+  x, y = batch
+  y_pred = model(x)
+  loss = jnp.mean((y - y_pred) ** 2)
+  return {'loss': loss}
+
+
+total_steps = 10_000
+for step, batch in enumerate(dataset(32)):
+  train_step(model, optimizer, batch)
+
+  if step % 1000 == 0:
+    logs = test_step(model, (X, Y))
+    print(f"step: {step}, loss: {logs['loss']}")
+
+  if step >= total_steps - 1:
+    break
+
+print('times called:', model.count)
+
+y_pred = model(X)
+
+plt.scatter(X, Y, color='blue')
+plt.plot(X, y_pred, color='black')
+plt.show()
diff --git a/flax/nnx/graph.py b/flax/nnx/graph.py
@@ -710,9 +710,7 @@ def _graph_fingerprint(
         append_fn(variable_index)
         for key_value in value._var_metadata.items():
           append_fn(key_value)
-    else:
-      if isinstance(value, (jax.Array, np.ndarray)):
-        raise ValueError(f'Arrays leaves are not supported: {value}')
+    elif not isinstance(value, (jax.Array, np.ndarray)):
       append_fn(value)
 
 
@@ -1146,8 +1144,16 @@ def _update_variable(node: Variable, value):
         raise ValueError(f'Expected a subgraph for {key!r}, but got: {value!r}')
       _graph_update_dynamic(current_value, value)
     else:
-      # case 3: state leaf is being updated
-      if not isinstance(current_value, Variable):
+      if isinstance(current_value, jax.Array | np.ndarray):
+        if isinstance(node_impl, PytreeNodeImpl):
+          raise ValueError(
+            f'Cannot set key {key!r} on immutable node of '
+            f'type {type(node).__name__}'
+          )
+        node_impl.set_key(node, key, value)
+        continue
+      elif not isinstance(current_value, Variable):
+        # case 3: state leaf is being updated
         raise ValueError(
           f'Trying to update a non-Variable attribute {key!r} with a Variable: '
           f'{value!r}'
@@ -1275,7 +1281,8 @@ def _cached_partial(f: tp.Callable[..., tp.Any], *cached_args):
   cached_ref_index: RefMap = RefMap()
 
   def create_static_cache(x):
-    if is_graph_node(x):
+    # TODO(cgarciae): support Array attribute updates for graph nodes
+    if is_graph_node(x) or isinstance(x, Variable):
       graphdef, flat_state = flatten(
         x, with_paths=True, return_variables=True, ref_index=original_ref_index
       )
@@ -1284,11 +1291,6 @@ def create_static_cache(x):
       # clone but keep the same variable references
       node_cache = unflatten(graphdef, flat_state, index_ref=index_ref)
       cached_new_ref_index = RefMap()
-      _fp = fingerprint(
-        node_cache,
-        ref_index=cached_ref_index,
-        new_ref_index=cached_new_ref_index,
-      )
       cached_ref_index.update(cached_new_ref_index)
       cache[node_cache] = StaticCache.create(
         graphdef, paths, variables, cached_new_ref_index