Skip to content

Commit 66d1c6c

Browse files
authored
Adding the Xavier Initializer (#5270)
* Adding the Xavier Initializer * Addressing code review feedback
1 parent 27ce729 commit 66d1c6c

2 files changed

Lines changed: 237 additions & 1 deletion

File tree

python/paddle/v2/framework/initializer.py

Lines changed: 130 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
import paddle.v2.framework.framework as framework
2+
import numpy as np
23

3-
__all__ = ['ConstantInitializer', 'UniformInitializer']
4+
__all__ = [
5+
'ConstantInitializer', 'UniformInitializer', 'NormalInitializer',
6+
'XavierInitializer'
7+
]
48

59

610
class Initializer(object):
@@ -20,6 +24,41 @@ def __call__(self, param, block):
2024
"""
2125
raise NotImplementedError()
2226

27+
def _compute_fans(self, var):
28+
"""Compute the fan_in and the fan_out for layers
29+
30+
This method computes the fan_in and the fan_out
31+
for neural network layers, if not specified. It is
32+
not possible to perfectly estimate fan_in and fan_out.
33+
This method will estimate it correctly for matrix multiply and
34+
convolutions.
35+
36+
Args:
37+
var: variable for which fan_in and fan_out have to be computed
38+
39+
Returns:
40+
tuple of two integers (fan_in, fan_out)
41+
"""
42+
shape = var.shape
43+
if not shape or len(shape) == 0:
44+
fan_in = fan_out = 1
45+
elif len(shape) == 1:
46+
fan_in = fan_out = shape[0]
47+
elif len(shape) == 2:
48+
# This is the case for simple matrix multiply
49+
fan_in = shape[0]
50+
fan_out = shape[1]
51+
else:
52+
# Assume this to be a convolutional kernel
53+
# In PaddlePaddle, the shape of the kernel is like:
54+
# [num_filters, num_filter_channels, ...] where the remaining
55+
# dimensions are the filter_size
56+
receptive_field_size = np.prod(shape[2:])
57+
fan_in = shape[1] * receptive_field_size
58+
fan_out = shape[0] * receptive_field_size
59+
60+
return (fan_in, fan_out)
61+
2362

2463
class ConstantInitializer(Initializer):
2564
"""Implements the constant initializer
@@ -156,3 +195,93 @@ def __call__(self, var, block):
156195
})
157196
var.op = op
158197
return op
198+
199+
200+
class XavierInitializer(Initializer):
201+
"""Implements the Xavier initializer
202+
203+
This class implements the Xavier weight initializer from the paper
204+
Understanding the difficulty of training deep feedforward neural
205+
networks[1] by Xavier Glorot and Yoshua Bengio.
206+
207+
This initializer is designed to keep the scale of the gradients
208+
approximately same in all the layers. In case of Uniform distribution,
209+
the range is [-x, x], where x = sqrt(6 / (fan_in + fan_out)).
210+
In case of Normal distribution, the mean is 0 and the standard deviation
211+
is sqrt(2/ (fan_in + fan_out)).
212+
213+
References:
214+
[1] Understanding the difficulty of training deep feedforward neural
215+
networks. International conference on artificial intelligence and
216+
statistics.
217+
(http://proceedings.mlr.press/v9/glorot10a.html)
218+
"""
219+
220+
def __init__(self, uniform=True, fan_in=None, fan_out=None, seed=0):
221+
"""Constructor for XavierInitializer
222+
223+
Args:
224+
uniform: whether to use uniform or normal distribution
225+
fan_in: fan_in for Xavier initialization. If None, it is
226+
inferred from the variable.
227+
fan_out: fan_out for Xavier initialization. If None, it is
228+
inferred from the variable.
229+
seed: random seed
230+
231+
Note: It is recommended to set fan_in and fan_out to None for
232+
most cases.
233+
"""
234+
assert uniform is not None
235+
assert seed is not None
236+
super(XavierInitializer, self).__init__()
237+
self._uniform = uniform
238+
self._fan_in = fan_in
239+
self._fan_out = fan_out
240+
self._seed = seed
241+
242+
def __call__(self, var, block):
243+
"""Add xavier initialization ops for a variable
244+
245+
Args:
246+
var: Variable that needs to be initialized
247+
block: The block in which initialization ops
248+
should be added
249+
250+
Returns:
251+
the initialization op
252+
"""
253+
assert isinstance(var, framework.Variable)
254+
assert isinstance(block, framework.Block)
255+
f_in, f_out = self._compute_fans(var)
256+
257+
# If fan_in and fan_out are passed, use them
258+
fan_in = f_in if self._fan_in is None else self._fan_in
259+
fan_out = f_out if self._fan_out is None else self._fan_out
260+
261+
if self._uniform:
262+
limit = np.sqrt(6.0 / float(fan_in + fan_out))
263+
op = block.prepend_op(
264+
type="uniform_random",
265+
outputs={"Out": var},
266+
attrs={
267+
"shape": var.shape,
268+
"data_type": int(var.data_type),
269+
"min": -limit,
270+
"max": limit,
271+
"seed": self._seed
272+
})
273+
274+
else:
275+
std = np.sqrt(2.0 / float(fan_in + fan_out))
276+
op = block.prepend_op(
277+
type="gaussian_random",
278+
outputs={"Out": var},
279+
attrs={
280+
"shape": var.shape,
281+
"data_type": int(var.data_type),
282+
"mean": 0.0,
283+
"std": std,
284+
"seed": self._seed
285+
})
286+
var.op = op
287+
return op

python/paddle/v2/framework/tests/test_initializer.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import numpy as np
12
import unittest
23

34
import paddle.v2.framework.framework as framework
@@ -116,5 +117,111 @@ def test_normal_initializer(self):
116117
self.assertEqual(init_op.attr('seed'), 123)
117118

118119

120+
class TestXavierInitializer(unittest.TestCase):
121+
def test_uniform_xavier_initializer(self):
122+
"""Test Xavier initializer with uniform distribution on
123+
for matrix multiply.
124+
"""
125+
program = framework.Program()
126+
block = program.global_block()
127+
param = block.create_parameter(
128+
dtype="float32",
129+
shape=[5, 10],
130+
lod_level=0,
131+
name="param",
132+
initializer=initializer.XavierInitializer())
133+
self.assertEqual(len(block.ops), 1)
134+
init_op = block.ops[0]
135+
self.assertEqual(init_op.type, 'uniform_random')
136+
limit = np.sqrt(6.0 / (param.shape[0] + param.shape[1]))
137+
self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA)
138+
self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA)
139+
self.assertEqual(init_op.attr('seed'), 0)
140+
141+
def test_uniform_xavier_initializer_conv(self):
142+
"""Test Xavier initializer with uniform distribution on
143+
for convolutions.
144+
"""
145+
program = framework.Program()
146+
block = program.global_block()
147+
param = block.create_parameter(
148+
dtype="float32",
149+
shape=[5, 10, 15, 20],
150+
lod_level=0,
151+
name="param",
152+
initializer=initializer.XavierInitializer())
153+
self.assertEqual(len(block.ops), 1)
154+
init_op = block.ops[0]
155+
self.assertEqual(init_op.type, 'uniform_random')
156+
receptive_field_size = float(15 * 20)
157+
limit = np.sqrt(6.0 / (
158+
(param.shape[0] + param.shape[1]) * receptive_field_size))
159+
self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA)
160+
self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA)
161+
self.assertEqual(init_op.attr('seed'), 0)
162+
163+
def test_normal_xavier_initializer(self):
164+
"""Test Xavier initializer with normal distribution on
165+
for matrix multiply.
166+
"""
167+
program = framework.Program()
168+
block = program.global_block()
169+
param = block.create_parameter(
170+
dtype="float32",
171+
shape=[5, 10],
172+
lod_level=0,
173+
name="param",
174+
initializer=initializer.XavierInitializer(uniform=False))
175+
self.assertEqual(len(block.ops), 1)
176+
init_op = block.ops[0]
177+
self.assertEqual(init_op.type, 'gaussian_random')
178+
std = np.sqrt(2.0 / (param.shape[0] + param.shape[1]))
179+
self.assertAlmostEqual(init_op.attr('mean'), 0.0, delta=DELTA)
180+
self.assertAlmostEqual(init_op.attr('std'), std, delta=DELTA)
181+
self.assertEqual(init_op.attr('seed'), 0)
182+
183+
def test_normal_xavier_initializer_conv(self):
184+
"""Test Xavier initializer with normal distribution on
185+
for convolutions.
186+
"""
187+
program = framework.Program()
188+
block = program.global_block()
189+
param = block.create_parameter(
190+
dtype="float32",
191+
shape=[5, 10, 15, 20],
192+
lod_level=0,
193+
name="param",
194+
initializer=initializer.XavierInitializer(uniform=False))
195+
self.assertEqual(len(block.ops), 1)
196+
init_op = block.ops[0]
197+
self.assertEqual(init_op.type, 'gaussian_random')
198+
receptive_field_size = float(15 * 20)
199+
std = np.sqrt(2.0 / (
200+
(param.shape[0] + param.shape[1]) * receptive_field_size))
201+
self.assertAlmostEqual(init_op.attr('mean'), 0.0, delta=DELTA)
202+
self.assertAlmostEqual(init_op.attr('std'), std, delta=DELTA)
203+
self.assertEqual(init_op.attr('seed'), 0)
204+
205+
def test_xavier_initializer_supplied_arguments(self):
206+
"""Test the Xavier initializer with supplied arguments
207+
"""
208+
program = framework.Program()
209+
block = program.global_block()
210+
block.create_parameter(
211+
dtype="float32",
212+
shape=[5, 10],
213+
lod_level=0,
214+
name="param",
215+
initializer=initializer.XavierInitializer(
216+
fan_in=12, fan_out=23, seed=134))
217+
self.assertEqual(len(block.ops), 1)
218+
init_op = block.ops[0]
219+
self.assertEqual(init_op.type, 'uniform_random')
220+
limit = np.sqrt(6.0 / (12 + 23))
221+
self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA)
222+
self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA)
223+
self.assertEqual(init_op.attr('seed'), 134)
224+
225+
119226
if __name__ == '__main__':
120227
unittest.main()

0 commit comments

Comments
 (0)