Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
8696c3d
add nn.init.kaiming_uniform_
zhiminzhang0830 Aug 7, 2025
87d9313
update kaiming_uniform_
zhiminzhang0830 Aug 11, 2025
5b8a1a5
update unit test for kaiming_uniform_
zhiminzhang0830 Aug 11, 2025
3eaa19b
add nn.init.kaiming_uniform_
zhiminzhang0830 Aug 7, 2025
b8e6b9c
update kaiming_uniform_
zhiminzhang0830 Aug 11, 2025
e791c18
update unit test for kaiming_uniform_
zhiminzhang0830 Aug 11, 2025
e2b2cc9
add xavier_uniform_, kaiming_normal_, uniform_
zhiminzhang0830 Aug 11, 2025
d2d614a
add unit test for xavier_uniform_, kaiming_normal_, uniform_
zhiminzhang0830 Aug 11, 2025
50cfb5c
add xavier_normal_ and its unit test
zhiminzhang0830 Aug 11, 2025
2c08d23
add normal_ and its unit test
zhiminzhang0830 Aug 11, 2025
c4c6917
fix: remove 'block' parameter from init.*() function
zhiminzhang0830 Aug 11, 2025
d31e3d3
fix
zhiminzhang0830 Aug 11, 2025
b5ccf0a
add nn.init.trunc_normal_ and its unit test
zhiminzhang0830 Aug 11, 2025
31cdc8b
add nn.init.constant_, nn.init.ones_, nn.init.zeros_
zhiminzhang0830 Aug 11, 2025
44d9d26
support paddle.pir.Value type
zhiminzhang0830 Aug 12, 2025
5afa04c
add dirac_, eye_, orthogonal_
zhiminzhang0830 Aug 12, 2025
ecb4da0
update unit test for nn.init.*
zhiminzhang0830 Aug 12, 2025
0c8bfd1
update init
zhiminzhang0830 Aug 12, 2025
4d4334f
add paddle.pir.Value
zhiminzhang0830 Aug 12, 2025
08a85c6
update unit test for nn.init.orthogonal_
zhiminzhang0830 Aug 12, 2025
1d4550e
Merge remote-tracking branch 'upstream/develop' into init
zhiminzhang0830 Aug 13, 2025
d330635
fix unit test for nn.init.eye_
zhiminzhang0830 Aug 14, 2025
02555a7
fix: skip unit test on dcu
zhiminzhang0830 Aug 14, 2025
d10e3bc
Merge remote-tracking branch 'upstream/develop' into init
zhiminzhang0830 Aug 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions python/paddle/nn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from paddle.nn import init as init
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里不需要导入init,直接paddle.nn.init会自动访问到 init.py 下面的函数吧,看torch是这样的

Copy link
Contributor Author

@zhiminzhang0830 zhiminzhang0830 Aug 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


from . import functional, initializer, quant, utils # noqa: F401
from .clip import ClipGradByGlobalNorm, ClipGradByNorm, ClipGradByValue
from .decode import BeamSearchDecoder, dynamic_decode
Expand Down Expand Up @@ -319,4 +321,5 @@
'LPPool2D',
'ZeroPad1D',
'ZeroPad3D',
'init',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个导出来是作为1个api吗,好像不用导出来

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

应该是由于__all__的原因,不导出的话这样使用会报错:

import paddle
tensor = paddle.zeros([32, 64])
paddle.nn.init.kaiming_uniform_(tensor)
AttributeError: module 'paddle.nn' has no attribute 'init'

]
53 changes: 53 additions & 0 deletions python/paddle/nn/init.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

from typing import TYPE_CHECKING

if TYPE_CHECKING:
import paddle

from .initializer.kaiming import KaimingUniform


def kaiming_uniform_(
tensor: paddle.Tensor,
a: float = 0,
mode: str = "fan_in",
nonlinearity: str = "leaky_relu",
block: paddle.pir.Block | None = None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个block建议先不加,这个是老IR用的,以后如果确实需要再加。这个对用户会有一些理解成本。

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

好的,已经去掉

) -> paddle.Tensor | None:
"""Modify tensor inplace using Kaiming uniform method.
Args:
tensor (Tensor): Paddle Tensor.
a (float, optional): The negative slope of the rectifier used after this layer.
Defaults to 0.
mode (str, optional): Mode to compute the fan. Choose from ["fan_in", "fan_out"].
When set to 'fan_in', the fan_in parameter is used for initialization.
When set to 'fan_out', the out_features of trainable Tensor will be used.
Default is 'fan_in'.
nonlinearity (str, optional): Nonlinearity method name. Defaults to "leaky_relu".
block (Block|None, optional): The block in which initialization ops
should be added. Used in static graph only, default None.
Returns:
Tensor: Initialized tensor.
"""
init = KaimingUniform(
negative_slope=a, nonlinearity=nonlinearity, mode=mode
)

return init(tensor, block=block)
6 changes: 6 additions & 0 deletions python/paddle/nn/initializer/initializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,11 @@
"conv2d",
"conv3d",
"conv1d_transpose",
"conv_transpose1d",
"conv2d_transpose",
"conv_transpose2d",
"conv3d_transpose",
"conv_transpose3d",
"tanh",
"relu",
"leaky_relu",
Expand Down Expand Up @@ -193,8 +196,11 @@ def calculate_gain(
'conv2d': 1,
'conv3d': 1,
'conv1d_transpose': 1,
'conv_transpose1d': 1,
'conv2d_transpose': 1,
'conv_transpose2d': 1,
'conv3d_transpose': 1,
'conv_transpose3d': 1,
'tanh': 5.0 / 3,
'relu': math.sqrt(2.0),
'leaky_relu': math.sqrt(2.0 / (1 + param**2)),
Expand Down
266 changes: 266 additions & 0 deletions test/legacy_test/test_nn_init_function.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import math
import random
import unittest

import numpy as np
from scipy import stats

import paddle
from paddle import nn
from paddle.pir.core import ParameterMeta

DELTA = 0.00001


class TestKaimingUniformFunc(unittest.TestCase):
def _test_kaiming_uniform_common(self, tensor):
init = paddle.nn.init.kaiming_uniform_
init(tensor, a=0, mode="fan_in", nonlinearity="leaky_relu")
init(tensor, a=-0.2, mode="fan_out", nonlinearity="leaky_relu")
init(tensor, a=0, mode="fan_in", nonlinearity="relu")
init(tensor, a=0, mode="fan_out", nonlinearity="relu")

def test_kaiming_uniform_linear(self):
linear = nn.Linear(40, 20)
self._test_kaiming_uniform_common(linear.weight)

def _create_random_nd_tensor(self, dims, size_min, size_max):
size = [random.randint(size_min, size_max) for _ in range(dims)]
tensor = paddle.zeros(size)
return tensor

def _is_uniform(self, tensor, a, b):
samples = tensor.view([-1]).tolist()
p_value = stats.kstest(samples, "uniform", args=(a, (b - a)))[1]
return p_value > 0.0001

def _random_float(self, a, b):
return (b - a) * random.random() + a

def calculate_gain(self, nonlinearity, param):
recommended_gain = {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这些是从torch的calculate_gain里查到的吗,也补充到paddle的calculate_gain里吧

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个函数paddle也有一个在nn.initializer.initializer下,为了能够与torch的torch.nn.init.calculate_gain的调用接口对应,也将这个函数引用到了paddle.nn.init文件中,可以paddle.nn.init.calculate_gain调用使用

'sigmoid': 1,
'linear': 1,
'conv1d': 1,
'conv2d': 1,
'conv3d': 1,
'conv1d_transpose': 1,
'conv_transpose1d': 1,
'conv2d_transpose': 1,
'conv_transpose2d': 1,
'conv3d_transpose': 1,
'conv_transpose3d': 1,
'tanh': 5.0 / 3,
'relu': math.sqrt(2.0),
'leaky_relu': math.sqrt(2.0 / (1 + param**2)),
'selu': 3.0 / 4,
}
return recommended_gain[nonlinearity]

def test_kaiming_uniform_nonlinearity(self):
for nonlinearity in [
'conv_transpose1d',
'conv_transpose2d',
'conv_transpose3d',
'relu',
'leaky_relu',
]:
input_tensor = paddle.zeros([1024, 512])
paddle.nn.init.kaiming_uniform_(
input_tensor, nonlinearity=nonlinearity
)

fan_in = input_tensor.shape[0]

expected_std = self.calculate_gain(
nonlinearity=nonlinearity, param=0
)

bounds = expected_std * math.sqrt(3.0 / float(fan_in))
assert self._is_uniform(input_tensor, -bounds, bounds)

def test_kaiming_uniform(self):
for use_a in [True, False]:
for dims in [2, 3, 4]:
for mode in ["fan_in", "fan_out"]:
input_tensor = self._create_random_nd_tensor(
dims, size_min=20, size_max=108
)
if use_a:
a = self._random_float(0.1, 2)
paddle.nn.init.kaiming_uniform_(
input_tensor, a=a, mode=mode
)
else:
a = 0
paddle.nn.init.kaiming_uniform_(input_tensor, mode=mode)

if dims == 2:
# This is the case for simple matrix multiply
fan_in = input_tensor.shape[0]
fan_out = input_tensor.shape[1]
else:
fan_in = input_tensor.shape[1]
fan_out = input_tensor.shape[0]

if input_tensor.dim() > 2:
fan_in *= input_tensor[0, 0].numel()
fan_out *= input_tensor[0, 0].numel()

if mode == "fan_in":
n = fan_in
else:
n = fan_out
expected_std = self.calculate_gain(
nonlinearity='leaky_relu', param=a
)
bounds = expected_std * math.sqrt(3.0 / float(n))
assert self._is_uniform(input_tensor, -bounds, bounds)

@unittest.skipIf(
not paddle.is_compiled_with_cuda(), "core is not compiled with CUDA"
)
def test_kaiming_uniform_fp16(self):
input_tensor = paddle.zeros([1024, 512], dtype='float16')
paddle.nn.init.kaiming_uniform_(input_tensor)
fan_in = input_tensor.shape[0]

expected_std = self.calculate_gain(nonlinearity='leaky_relu', param=0)

bounds = expected_std * math.sqrt(3.0 / float(fan_in))
assert self._is_uniform(input_tensor, -bounds, bounds)
assert input_tensor.dtype == paddle.float16


class TestKaimingUniformFuncPir(unittest.TestCase):
def setUp(self):
self.init_uniform_op_name = 'pd_op.uniform'

def get_operand_definition_op_attrs(self, cur_op, operand_name, attr_name):
input_names = cur_op.get_input_names()
self.assertIn(operand_name, input_names)
attr = (
cur_op.operand(input_names.index(operand_name))
.source()
.get_defining_op()
.attrs()[attr_name]
)
return attr

def get_init_ops_by_op_name(self, block, op_name):
checked_ops = []
for op in block.ops:
# get init op
if op_name == op.name():
checked_ops.append(op)
return checked_ops

def test_kaiming_uniform_(self):
with paddle.pir_utils.IrGuard():
main = paddle.static.Program()
with paddle.static.program_guard(main, paddle.static.Program()):
parameter_meta = ParameterMeta([1024, 512], paddle.float32)
init_result = paddle.nn.init.kaiming_uniform_(
parameter_meta, block=main.global_block()
)
block = main.global_block()
checked_ops = self.get_init_ops_by_op_name(
block, self.init_uniform_op_name
)
self.assertEqual(len(checked_ops), 1)
init_op = checked_ops[0]
limit = np.sqrt(6.0 / init_result.shape[0])

min = self.get_operand_definition_op_attrs(
init_op, "min", "value"
)
max = self.get_operand_definition_op_attrs(
init_op, "max", "value"
)
self.assertAlmostEqual(min, -limit, delta=DELTA)
self.assertAlmostEqual(max, limit, delta=DELTA)
self.assertEqual(init_op.attrs()['seed'], 0)

def test_kaiming_uniform_conv(self):
with paddle.pir_utils.IrGuard():
main = paddle.static.Program()
with paddle.static.program_guard(main, paddle.static.Program()):
parameter_meta = ParameterMeta([5, 10, 15, 20], paddle.float32)
init_result = paddle.nn.init.kaiming_uniform_(
parameter_meta, block=main.global_block()
)
block = main.global_block()
checked_ops = self.get_init_ops_by_op_name(
block, self.init_uniform_op_name
)
self.assertEqual(len(checked_ops), 1)
init_op = checked_ops[0]
limit = np.sqrt(
6.0
/ (
init_result.shape[1]
* init_result.shape[2]
* init_result.shape[3]
)
)

min = self.get_operand_definition_op_attrs(
init_op, "min", "value"
)
max = self.get_operand_definition_op_attrs(
init_op, "max", "value"
)
self.assertAlmostEqual(min, -limit, delta=DELTA)
self.assertAlmostEqual(max, limit, delta=DELTA)
self.assertEqual(init_op.attrs()['seed'], 0)

def test_kaiming_uniform_fan_out(self):
with paddle.pir_utils.IrGuard():
main = paddle.static.Program()
with paddle.static.program_guard(main, paddle.static.Program()):
parameter_meta = ParameterMeta([5, 10, 15, 20], paddle.float32)
init_result = paddle.nn.init.kaiming_uniform_(
parameter_meta, mode='fan_out', block=main.global_block()
)
block = main.global_block()
checked_ops = self.get_init_ops_by_op_name(
block, self.init_uniform_op_name
)
self.assertEqual(len(checked_ops), 1)
init_op = checked_ops[0]
limit = np.sqrt(
6.0
/ (
init_result.shape[0]
* init_result.shape[2]
* init_result.shape[3]
)
)

min = self.get_operand_definition_op_attrs(
init_op, "min", "value"
)
max = self.get_operand_definition_op_attrs(
init_op, "max", "value"
)
self.assertAlmostEqual(min, -limit, delta=DELTA)
self.assertAlmostEqual(max, limit, delta=DELTA)
self.assertEqual(init_op.attrs()['seed'], 0)


if __name__ == '__main__':
unittest.main()
Loading