google
diff --git a/‎docs_nnx/api_reference/flax.nnx/helpers.rst‎
Lines changed: 1 addition & 4 deletions b/‎docs_nnx/api_reference/flax.nnx/helpers.rst‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎docs_nnx/api_reference/flax.nnx/training/optimizer.rst‎
Lines changed: 3 additions & 0 deletions b/‎docs_nnx/api_reference/flax.nnx/training/optimizer.rst‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎docs_nnx/guides/filters_guide.ipynb‎
Lines changed: 1 addition & 1 deletion b/‎docs_nnx/guides/filters_guide.ipynb‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs_nnx/guides/filters_guide.md‎
Lines changed: 1 addition & 1 deletion b/‎docs_nnx/guides/filters_guide.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/nnx_toy_examples/06_scan_over_layers.py‎
Lines changed: 0 additions & 1 deletion b/‎examples/nnx_toy_examples/06_scan_over_layers.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎examples/nnx_toy_examples/10_fsdp_and_optimizer.py‎
Lines changed: 15 additions & 5 deletions b/‎examples/nnx_toy_examples/10_fsdp_and_optimizer.py‎
Lines changed: 15 additions & 5 deletions
diff --git a/‎examples/nnx_toy_examples/mutable_array_demo.py‎
Lines changed: 2 additions & 2 deletions b/‎examples/nnx_toy_examples/mutable_array_demo.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎flax/nnx/__init__.py‎
Lines changed: 1 addition & 2 deletions b/‎flax/nnx/__init__.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎flax/nnx/bridge/module.py‎
Lines changed: 1 addition & 2 deletions b/‎flax/nnx/bridge/module.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎flax/nnx/bridge/wrappers.py‎
Lines changed: 1 addition & 6 deletions b/‎flax/nnx/bridge/wrappers.py‎
Lines changed: 1 addition & 6 deletions
@@ -4,10 +4,7 @@ helpers
 .. automodule:: flax.nnx
 .. currentmodule:: flax.nnx
 
-.. autoclass:: Dict
-   :members:
-.. autoclass:: List
-   :members:
+
 .. autoclass:: Sequential
    :members:
 .. autoclass:: TrainState
 
@@ -6,3 +6,6 @@ Optimizer
 
 .. autoclass:: Optimizer
    :members: __init__, update
+
+.. autoclass:: MutableArrayOptimizer
+   :members: __init__, update
@@ -5,7 +5,7 @@
    "id": "95b08e64",
    "metadata": {},
    "source": [
-    "# Using Filters, grouping NNX variables \n",
+    "# Filters\n",
     "\n",
     "Flax NNX uses [`Filter`s](https://flax.readthedocs.io/en/latest/api_reference/flax.nnx/filterlib.html) extensively as a way to create [`nnx.State`](https://flax.readthedocs.io/en/latest/api_reference/flax.nnx/state.html#flax.nnx.State) groups in APIs, such as [`nnx.split`](https://flax.readthedocs.io/en/latest/api_reference/flax.nnx/graph.html#flax.nnx.split), [`nnx.state()`](https://flax.readthedocs.io/en/latest/api_reference/flax.nnx/graph.html#flax.nnx.state), and many of the [Flax NNX transformations (transforms)](https://flax.readthedocs.io/en/latest/guides/jax_and_nnx_transforms.html).\n",
     "\n",
 
@@ -8,7 +8,7 @@ jupytext:
     jupytext_version: 1.13.8
 ---
 
-# Using Filters, grouping NNX variables 
+# Filters
 
 Flax NNX uses [`Filter`s](https://flax.readthedocs.io/en/latest/api_reference/flax.nnx/filterlib.html) extensively as a way to create [`nnx.State`](https://flax.readthedocs.io/en/latest/api_reference/flax.nnx/state.html#flax.nnx.State) groups in APIs, such as [`nnx.split`](https://flax.readthedocs.io/en/latest/api_reference/flax.nnx/graph.html#flax.nnx.split), [`nnx.state()`](https://flax.readthedocs.io/en/latest/api_reference/flax.nnx/graph.html#flax.nnx.state), and many of the [Flax NNX transformations (transforms)](https://flax.readthedocs.io/en/latest/guides/jax_and_nnx_transforms.html).
 
 
@@ -47,7 +47,6 @@ def create_block(rngs: nnx.Rngs):
     self.layers = create_block(rngs)
 
   def __call__(self, x: jax.Array) -> jax.Array:
-    @nnx.split_rngs(splits=self.n_layers)
     @nnx.scan
     def scan_fn(x: jax.Array, block: Block):
       x = block(x)
 
@@ -84,19 +84,29 @@ def init_optimizer_state(variable: nnx.Variable):
 
     self.lr = lr
     self.params = params
-    self.momentum: nnx.State = jax.tree.map(init_optimizer_state, self.params)
+    self.momentum: nnx.State = jax.tree.map(
+      init_optimizer_state,
+      self.params,
+      is_leaf=lambda x: isinstance(x, nnx.Variable | nnx.VariableState),
+    )
     self.decay = decay
 
   def update(self, grads: nnx.State):
     def update_fn(
       params: nnx.Variable, momentum: SGDState, grad: nnx.VariableState
     ):
       # v_t = β * v_{t-1} + (1 - β) * ∇J(θ_t)
-      momentum.value = self.decay * momentum + (1 - self.decay) * grad.value
+      momentum[...] = self.decay * momentum[...] + (1 - self.decay) * grad[...]
       # θ_{t+1} = θ_t - α * v_t
-      params.value -= self.lr * momentum
-
-    jax.tree.map(update_fn, self.params, self.momentum, grads)
+      params[...] -= self.lr * momentum[...]
+
+    jax.tree.map(
+      update_fn,
+      self.params,
+      self.momentum,
+      grads,
+      is_leaf=lambda x: isinstance(x, nnx.Variable | nnx.VariableState),
+    )
 
 
 @nnx.jit
 
@@ -259,10 +259,10 @@ def update_fn(
 # compute the loss by calling the model with the inputs.
 @jax.jit
 def train_step(model: Model, optimizer: SGD, rngs: nnx.Rngs, x, y):
-  treedef, params, nondiff = nnx.split(model, nnx.Param, ...)
+  graphdef, params, nondiff = nnx.split(model, nnx.Param, ...)
 
   def loss_fn(params):
-    model = nnx.merge(treedef, params, nondiff)
+    model = nnx.merge(graphdef, params, nondiff)
     loss = jnp.mean((model(x, rngs=rngs) - y) ** 2)
     return loss
 
 
@@ -31,7 +31,6 @@
 from .graph import GraphState as GraphState
 from .graph import PureState as PureState
 from .object import Object as Object
-from .helpers import Dict as Dict
 from .helpers import Sequential as Sequential
 from .helpers import TrainState as TrainState
 from .module import M as M
@@ -139,7 +138,7 @@
 from .training.metrics import Metric as Metric
 from .training.metrics import MultiMetric as MultiMetric
 from .training.optimizer import Optimizer as Optimizer
-from .training.optimizer import OptaxOptimizer as OptaxOptimizer
+from .training.optimizer import MutableArrayOptimizer as MutableArrayOptimizer
 from .transforms.autodiff import DiffState as DiffState
 from .transforms.autodiff import grad as grad
 from .transforms.autodiff import value_and_grad as value_and_grad
 
@@ -224,8 +224,7 @@ class ModuleBase:
 @tpe.dataclass_transform(field_specifiers=(dataclasses.field,))  # type: ignore[not-supported-yet]
 class Module(nnx_module.Module, ModuleBase, metaclass=ModuleMeta):
   def __init_subclass__(cls) -> None:
-    cls.__data__ = 'auto'
-    super().__init_subclass__()
+    super().__init_subclass__(pytree=False)
 
     cls = dataclasses.dataclass(repr=False)(cls)
     cls.__hash__ = object.__hash__  # type: ignore[method-assign]
 
@@ -30,7 +30,6 @@
 from flax.nnx.statelib import State
 import jax
 from jax import tree_util as jtu
-from flax import config
 
 M = tp.TypeVar('M', bound=Module)
 
@@ -87,9 +86,7 @@ def lazy_init(fn: Module | tp.Callable[..., tp.Any], *args, **kwargs):
     _set_initializing(module, False)
   return fn
 
-PYTREE_DEFAULT = 'auto' if config.flax_mutable_array else None
-
-class ToNNX(Module):
+class ToNNX(Module, pytree=False):
   """A wrapper to turn any Linen module into an NNX module.
 
   The result NNX module can be used standalone with all NNX APIs, or as a submodule of
@@ -119,8 +116,6 @@ class ToNNX(Module):
     A stateful NNX module that behaves the same as the wrapped Linen module.
   """
 
-  __data__ = 'auto'
-
   def __init__(
     self,
     module: linen.Module,