PaddlePaddle · zhwesky2010 · Aug 18, 2025 · Aug 7, 2025 · Aug 11, 2025 · Aug 11, 2025
diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from paddle.nn import init as init
+
 from . import functional, initializer, quant, utils  # noqa: F401
 from .clip import ClipGradByGlobalNorm, ClipGradByNorm, ClipGradByValue
 from .decode import BeamSearchDecoder, dynamic_decode
@@ -319,4 +321,5 @@
     'LPPool2D',
     'ZeroPad1D',
     'ZeroPad3D',
+    'init',
 ]
diff --git a/python/paddle/nn/init.py b/python/paddle/nn/init.py
@@ -0,0 +1,68 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+
+import paddle
+
+from .initializer.initializer import calculate_gain, compute_fans
+
+
+def _no_grad_uniform_(tensor, a, b):
+    with paddle.no_grad():
+        tensor.set_value(
+            paddle.uniform(shape=tensor.shape, dtype=tensor.dtype, min=a, max=b)
+        )
+        return tensor
+
+
+def _calculate_correct_fan(tensor, mode):
+    mode = mode.lower()
+    valid_modes = ["fan_in", "fan_out"]
+    if mode not in valid_modes:
+        raise ValueError(
+            f"Mode {mode} not supported, please use one of {valid_modes}"
+        )
+
+    fan_in, fan_out = compute_fans(tensor)
+
+    return fan_in if mode == "fan_in" else fan_out
+
+
+def kaiming_uniform_(
+    tensor: paddle.Tensor,
+    a: float = 0,
+    mode: str = "fan_in",
+    nonlinearity: str = "leaky_relu",
+) -> paddle.Tensor:
+    """Modify tensor inplace using Kaiming uniform method.
+
+    Args:
+        tensor (Tensor):  Paddle Tensor.
+        a (float, optional): The negative slope of the rectifier used after this layer.
+            Defaults to 0.
+        mode (str, optional): Mode to compute the fan. Choose from ["fan_in", "fan_out"].
+            When set to 'fan_in', the fan_in parameter is used for initialization.
+            When set to 'fan_out', the out_features of trainable Tensor will be used.
+            Default is 'fan_in'.
+        nonlinearity (str, optional): Nonlinearity method name. Defaults to "leaky_relu".
+
+    Returns:
+        Tensor: Initialized tensor.
+    """
+    fan = _calculate_correct_fan(tensor, mode)
+    gain = calculate_gain(nonlinearity, a)
+    std = gain / math.sqrt(fan)
+    k = math.sqrt(3.0) * std
+    return _no_grad_uniform_(tensor, -k, k)
diff --git a/python/paddle/nn/initializer/initializer.py b/python/paddle/nn/initializer/initializer.py
@@ -109,44 +109,45 @@ def _check_block(self, block: paddle.pir.Block | None) -> paddle.pir.Block:
 
         return block
 
-    def _compute_fans(self, var: paddle.Tensor) -> tuple[int, int]:
-        """Compute the fan_in and the fan_out for layers
 
-        This method computes the fan_in and the fan_out
-        for neural network layers, if not specified. It is
-        not possible to perfectly estimate fan_in and fan_out.
-        This method will estimate it correctly for matrix multiply and
-        convolutions.
+def compute_fans(var: paddle.Tensor) -> tuple[int, int]:
+    """Compute the fan_in and the fan_out for layers
 
-        Args:
-            var: variable for which fan_in and fan_out have to be computed.
+    This method computes the fan_in and the fan_out
+    for neural network layers, if not specified. It is
+    not possible to perfectly estimate fan_in and fan_out.
+    This method will estimate it correctly for matrix multiply and
+    convolutions.
 
-        Returns:
-            tuple of two integers (fan_in, fan_out).
-        """
-        shape = (
-            var._local_shape
-            if (isinstance(var, EagerParamBase) and var.is_dist())
-            else var.shape
-        )
-        if not shape or len(shape) == 0:
-            fan_in = fan_out = 1
-        elif len(shape) == 1:
-            fan_in = fan_out = shape[0]
-        elif len(shape) == 2:
-            # This is the case for simple matrix multiply
-            fan_in = shape[0]
-            fan_out = shape[1]
-        else:
-            # Assume this to be a convolutional kernel
-            # In PaddlePaddle, the shape of the kernel is like:
-            # [num_filters, num_filter_channels, ...] where the remaining
-            # dimensions are the filter_size
-            receptive_field_size = np.prod(shape[2:])
-            fan_in = shape[1] * receptive_field_size
-            fan_out = shape[0] * receptive_field_size
-
-        return (fan_in, fan_out)
+    Args:
+        var: variable for which fan_in and fan_out have to be computed.
+
+    Returns:
+        tuple of two integers (fan_in, fan_out).
+    """
+    shape = (
+        var._local_shape
+        if (isinstance(var, EagerParamBase) and var.is_dist())
+        else var.shape
+    )
+    if not shape or len(shape) == 0:
+        fan_in = fan_out = 1
+    elif len(shape) == 1:
+        fan_in = fan_out = shape[0]
+    elif len(shape) == 2:
+        # This is the case for simple matrix multiply
+        fan_in = shape[0]
+        fan_out = shape[1]
+    else:
+        # Assume this to be a convolutional kernel
+        # In PaddlePaddle, the shape of the kernel is like:
+        # [num_filters, num_filter_channels, ...] where the remaining
+        # dimensions are the filter_size
+        receptive_field_size = np.prod(shape[2:])
+        fan_in = shape[1] * receptive_field_size
+        fan_out = shape[0] * receptive_field_size
+
+    return (fan_in, fan_out)
 
 
 def calculate_gain(
@@ -193,8 +194,11 @@ def calculate_gain(
         'conv2d': 1,
         'conv3d': 1,
         'conv1d_transpose': 1,
+        'conv_transpose1d': 1,
         'conv2d_transpose': 1,
+        'conv_transpose2d': 1,
         'conv3d_transpose': 1,
+        'conv_transpose3d': 1,
         'tanh': 5.0 / 3,
         'relu': math.sqrt(2.0),
         'leaky_relu': math.sqrt(2.0 / (1 + param**2)),

diff --git a/python/paddle/nn/initializer/kaiming.py b/python/paddle/nn/initializer/kaiming.py
@@ -27,7 +27,7 @@
     in_dygraph_mode,
     in_pir_mode,
 )
-from .initializer import Initializer, calculate_gain
+from .initializer import Initializer, calculate_gain, compute_fans
 
 if TYPE_CHECKING:
     from .initializer import _NonLinearity
@@ -120,7 +120,7 @@ def forward(
             var, (framework.Variable, paddle.pir.core.ParameterMeta)
         )
         assert isinstance(block, (framework.Block, paddle.pir.Block))
-        f_in, f_out = self._compute_fans(var)
+        f_in, f_out = compute_fans(var)
 
         # If fan_in is passed, use it
         if self._mode == 'fan_in':

diff --git a/python/paddle/nn/initializer/xavier.py b/python/paddle/nn/initializer/xavier.py
@@ -26,7 +26,7 @@
     in_dygraph_mode,
     in_pir_mode,
 )
-from .initializer import Initializer
+from .initializer import Initializer, compute_fans
 
 __all__ = []
 
@@ -109,7 +109,7 @@ def forward(
                 "xavier_init",
             )
 
-        f_in, f_out = self._compute_fans(var)
+        f_in, f_out = compute_fans(var)
 
         # If fan_in and fan_out are passed, use them
         fan_in = f_in if self._fan_in is None else self._fan_in

diff --git a/test/legacy_test/test_nn_init_function.py b/test/legacy_test/test_nn_init_function.py
@@ -0,0 +1,96 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import random
+import unittest
+
+import numpy as np
+from scipy import stats
+
+import paddle
+from paddle import nn
+
+
+def get_uniform_min_and_max(weight):
+    min_value = np.min(weight)
+    max_value = np.max(weight)
+    return min_value, max_value
+
+
+class TestKaimingUniform(unittest.TestCase):
+    def _test_kaiming_uniform_common(self, tensor):
+        init = paddle.nn.init.kaiming_uniform_
+        init(tensor, a=0, mode="fan_in", nonlinearity="leaky_relu")
+        init(tensor, a=-0.2, mode="fan_out", nonlinearity="leaky_relu")
+        init(tensor, a=0, mode="fan_in", nonlinearity="relu")
+        init(tensor, a=0, mode="fan_out", nonlinearity="relu")
+
+    def test_kaiming_uniform_linear(self):
+        linear = nn.Linear(40, 20)
+        self._test_kaiming_uniform_common(linear.weight)
+
+    def _create_random_nd_tensor(self, dims, size_min, size_max):
+        size = [random.randint(size_min, size_max) for _ in range(dims)]
+        tensor = paddle.zeros(size)
+        return tensor
+
+    def _is_uniform(self, tensor, a, b):
+        samples = tensor.view([-1]).tolist()
+        p_value = stats.kstest(samples, "uniform", args=(a, (b - a)))[1]
+        return p_value > 0.0001
+
+    def _random_float(self, a, b):
+        return (b - a) * random.random() + a
+
+    def test_kaiming_uniform(self):
+        for use_a in [True, False]:
+            for dims in [2, 4]:
+                for mode in ["fan_in", "fan_out"]:
+                    input_tensor = self._create_random_nd_tensor(
+                        dims, size_min=20, size_max=25
+                    )
+                    if use_a:
+                        a = self._random_float(0.1, 2)
+                        paddle.nn.init.kaiming_uniform_(
+                            input_tensor, a=a, mode=mode
+                        )
+                    else:
+                        a = 0
+                        paddle.nn.init.kaiming_uniform_(input_tensor, mode=mode)
+
+                    if dims == 2:
+                        # This is the case for simple matrix multiply
+                        fan_in = input_tensor.shape[0]
+                        fan_out = input_tensor.shape[1]
+                    else:
+                        fan_in = input_tensor.shape[1]
+                        fan_out = input_tensor.shape[0]
+
+                    if input_tensor.dim() > 2:
+                        fan_in *= input_tensor[0, 0].numel()
+                        fan_out *= input_tensor[0, 0].numel()
+
+                    if mode == "fan_in":
+                        n = fan_in
+                    else:
+                        n = fan_out
+
+                    expected_std = math.sqrt(2.0 / ((1 + a**2) * n))
+                    bounds = expected_std * math.sqrt(3.0)
+                    assert self._is_uniform(input_tensor, -bounds, bounds)
+
+
+if __name__ == '__main__':
+    unittest.main()