Skip to content

Commit 1b2fcfb

Browse files
committed
Merge branch 'develop' into rm-fluid-dy
2 parents c35c58a + 49656af commit 1b2fcfb

16 files changed

+33
-994
lines changed

python/paddle/fluid/dygraph/nn.py

Lines changed: 0 additions & 328 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,8 @@
5353
'Pool2D',
5454
'Linear',
5555
'BatchNorm',
56-
'Dropout',
5756
'Embedding',
5857
'GRUUnit',
59-
'LayerNorm',
6058
'NCE',
6159
'PRelu',
6260
'BilinearTensorProduct',
@@ -1180,124 +1178,6 @@ def forward(self, input):
11801178
return self._helper.append_activation(batch_norm_out, self._act)
11811179

11821180

1183-
class Dropout(layers.Layer):
1184-
"""
1185-
This interface is used to construct a callable object of the ``Dropout`` class.
1186-
For more details, refer to code examples.
1187-
1188-
Drop or keep each element of input independently. Dropout is a regularization
1189-
technique for reducing overfitting by preventing neuron co-adaption during
1190-
training. The dropout operator randomly sets (according to the given dropout
1191-
probability) the outputs of some units to zero, while others are remain
1192-
unchanged.
1193-
1194-
Dropout layer can be removed for efficiency concern.
1195-
1196-
Parameters:
1197-
p (float, optional): Probability of setting units to zero. Default: 0.5
1198-
seed (int, optional): A Python integer used to create random seeds. If this
1199-
parameter is set to None, a random seed is used.
1200-
NOTE: If an integer seed is given, always the same output
1201-
units will be dropped. DO NOT use a fixed seed in training. Default: None.
1202-
dropout_implementation(string, optional): ['downgrade_in_infer'(default)|'upscale_in_train']
1203-
1204-
1. downgrade_in_infer(default), downgrade the outcome at inference
1205-
1206-
- train: out = input * mask
1207-
- inference: out = input * (1.0 - p)
1208-
1209-
(mask is a tensor same shape with input, value is 0 or 1
1210-
ratio of 0 is dropout_prob)
1211-
2. upscale_in_train, upscale the outcome at training time
1212-
1213-
- train: out = input * mask / ( 1.0 - p )
1214-
- inference: out = input
1215-
1216-
(mask is a tensor same shape with input, value is 0 or 1
1217-
ratio of 0 is p)
1218-
is_test (bool, optional): A flag indicating whether it is in test phrase or not.
1219-
This flag only has effect on static graph mode. For dygraph mode, please use ``eval()``.
1220-
Default: False.
1221-
1222-
Returns:
1223-
None
1224-
1225-
Examples:
1226-
1227-
.. code-block:: python
1228-
1229-
import paddle.fluid as fluid
1230-
from paddle.fluid.dygraph.base import to_variable
1231-
import numpy as np
1232-
1233-
x = np.random.random(size=(3, 10, 3, 7)).astype('float32')
1234-
with fluid.dygraph.guard():
1235-
x = to_variable(x)
1236-
m = fluid.dygraph.Dropout(p=0.5)
1237-
droped_train = m(x)
1238-
# switch to eval mode
1239-
m.eval()
1240-
droped_eval = m(x)
1241-
"""
1242-
1243-
def __init__(
1244-
self,
1245-
p=0.5,
1246-
seed=None,
1247-
dropout_implementation="downgrade_in_infer",
1248-
is_test=False,
1249-
):
1250-
super().__init__()
1251-
assert isinstance(p, (float, int)), "p argument should be a number"
1252-
assert 0 <= p <= 1, "p argument should between 0 and 1"
1253-
self._dropout_prob = p
1254-
assert seed is None or isinstance(
1255-
seed, int
1256-
), "seed argument should be None or a integer"
1257-
self._seed = seed
1258-
assert dropout_implementation in (
1259-
'downgrade_in_infer',
1260-
'upscale_in_train',
1261-
), "dropout_implementation argument should be 'downgrade_in_infer' or 'upscale_in_train'"
1262-
self._dropout_implementation = dropout_implementation
1263-
self._is_test = is_test
1264-
1265-
def forward(self, input):
1266-
# fast return for p == 0
1267-
if self._dropout_prob == 0:
1268-
return input
1269-
prog = default_main_program()
1270-
if (self._seed is None or self._seed == 0) and prog.random_seed != 0:
1271-
self._seed = prog.random_seed
1272-
attrs = {
1273-
'dropout_prob': self._dropout_prob,
1274-
'is_test': not self.training
1275-
if _non_static_mode()
1276-
else self._is_test,
1277-
'fix_seed': self._seed is not None,
1278-
'seed': self._seed if self._seed is not None else 0,
1279-
'dropout_implementation': self._dropout_implementation,
1280-
}
1281-
1282-
if _non_static_mode():
1283-
attrs = sum(attrs.items(), ())
1284-
out, mask = _legacy_C_ops.dropout(input, *attrs)
1285-
return out
1286-
1287-
out = self._helper.create_variable_for_type_inference(dtype=input.dtype)
1288-
mask = self._helper.create_variable_for_type_inference(
1289-
dtype=core.VarDesc.VarType.UINT8, stop_gradient=True
1290-
)
1291-
1292-
self._helper.append_op(
1293-
type='dropout',
1294-
inputs={'X': [input]},
1295-
outputs={'Out': [out], 'Mask': [mask]},
1296-
attrs=attrs,
1297-
)
1298-
return out
1299-
1300-
13011181
class Embedding(layers.Layer):
13021182
r"""
13031183
:alias_main: paddle.nn.Embedding
@@ -1479,214 +1359,6 @@ def forward(self, input):
14791359
return out
14801360

14811361

1482-
class LayerNorm(layers.Layer):
1483-
r"""
1484-
:alias_main: paddle.nn.LayerNorm
1485-
:alias: paddle.nn.LayerNorm,paddle.nn.layer.LayerNorm,paddle.nn.layer.norm.LayerNorm
1486-
:old_api: paddle.fluid.dygraph.LayerNorm
1487-
1488-
This interface is used to construct a callable object of the ``LayerNorm`` class.
1489-
For more details, refer to code examples.
1490-
It implements the function of the Layer Normalization Layer and can be applied to mini-batch input data.
1491-
Refer to `Layer Normalization <https://arxiv.org/pdf/1607.06450v1.pdf>`_
1492-
1493-
The formula is as follows:
1494-
1495-
.. math::
1496-
1497-
\\mu & = \\frac{1}{H}\\sum_{i=1}^{H} x_i
1498-
1499-
\\sigma & = \\sqrt{\\frac{1}{H}\sum_{i=1}^{H}{(x_i - \\mu)^2} + \\epsilon}
1500-
1501-
y & = f(\\frac{g}{\\sigma}(x - \\mu) + b)
1502-
1503-
- :math:`x`: the vector representation of the summed inputs to the neurons in that layer.
1504-
- :math:`H`: the number of hidden units in a layers
1505-
- :math:`\\epsilon`: the small value added to the variance to prevent division by zero.
1506-
- :math:`g`: the trainable scale parameter.
1507-
- :math:`b`: the trainable bias parameter.
1508-
1509-
Parameters:
1510-
normalized_shape(int or list or tuple): Input shape from an expected input of
1511-
size :math:`[*, normalized_shape[0], normalized_shape[1], ..., normalized_shape[-1]]`.
1512-
If it is a single integer, this module will normalize over the last dimension
1513-
which is expected to be of that specific size.
1514-
scale(bool, optional): Whether to learn the adaptive gain :math:`g` after
1515-
normalization. Default: True.
1516-
shift(bool, optional): Whether to learn the adaptive bias :math:`b` after
1517-
normalization. Default: True.
1518-
epsilon(float, optional): The small value added to the variance to prevent
1519-
division by zero. Default: 1e-05.
1520-
param_attr(ParamAttr, optional): The parameter attribute for the learnable
1521-
gain :math:`g`. If :attr:`scale` is False, :attr:`param_attr` is
1522-
omitted. If :attr:`scale` is True and :attr:`param_attr` is None,
1523-
a default :code:`ParamAttr` would be added as scale. The
1524-
:attr:`param_attr` is initialized as 1 if it is added. Default: None.
1525-
bias_attr(ParamAttr, optional): The parameter attribute for the learnable
1526-
bias :math:`b`. If :attr:`shift` is False, :attr:`bias_attr` is
1527-
omitted. If :attr:`shift` is True and :attr:`param_attr` is None,
1528-
a default :code:`ParamAttr` would be added as bias. The
1529-
:attr:`bias_attr` is initialized as 0 if it is added. Default: None.
1530-
act(str, optional): Activation to be applied to the output of layer normalization.
1531-
Default: None.
1532-
dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32".
1533-
1534-
Returns:
1535-
None
1536-
1537-
Examples:
1538-
1539-
.. code-block:: python
1540-
1541-
import paddle.fluid as fluid
1542-
from paddle.fluid.dygraph.base import to_variable
1543-
import numpy
1544-
1545-
x = numpy.random.random((3, 32, 32)).astype('float32')
1546-
with fluid.dygraph.guard():
1547-
x = to_variable(x)
1548-
layerNorm = fluid.LayerNorm([32, 32])
1549-
ret = layerNorm(x)
1550-
1551-
"""
1552-
1553-
def __init__(
1554-
self,
1555-
normalized_shape,
1556-
scale=True,
1557-
shift=True,
1558-
epsilon=1e-05,
1559-
param_attr=None,
1560-
bias_attr=None,
1561-
act=None,
1562-
dtype='float32',
1563-
):
1564-
super().__init__()
1565-
if isinstance(normalized_shape, numbers.Integral):
1566-
normalized_shape = [normalized_shape]
1567-
1568-
self._normalized_shape = list(normalized_shape)
1569-
self._scale = scale
1570-
self._shift = shift
1571-
self._epsilon = epsilon
1572-
self._param_attr = param_attr
1573-
self._bias_attr = bias_attr
1574-
self._act = act
1575-
self._dtype = dtype
1576-
param_shape = [np.prod(self._normalized_shape)]
1577-
if self._scale:
1578-
self.weight = self.create_parameter(
1579-
attr=self._param_attr,
1580-
shape=param_shape,
1581-
dtype=self._dtype,
1582-
default_initializer=Constant(1.0),
1583-
)
1584-
else:
1585-
if self._param_attr:
1586-
logging.warn("param_attr are only available with scale is True")
1587-
self.weight = None
1588-
1589-
if self._shift:
1590-
assert self._bias_attr is not False
1591-
self.bias = self.create_parameter(
1592-
attr=self._bias_attr,
1593-
shape=param_shape,
1594-
dtype=self._dtype,
1595-
is_bias=True,
1596-
)
1597-
else:
1598-
if self._bias_attr:
1599-
logging.warn("bias_attr are only available with shift is True")
1600-
self.bias = None
1601-
1602-
def forward(self, input):
1603-
input_shape = list(input.shape)
1604-
input_ndim = len(input_shape)
1605-
normalized_ndim = len(self._normalized_shape)
1606-
self._begin_norm_axis = input_ndim - normalized_ndim
1607-
if (
1608-
input_ndim < normalized_ndim
1609-
or input_shape[self._begin_norm_axis :] != self._normalized_shape
1610-
):
1611-
str_normalized_shape = str(self._normalized_shape)
1612-
raise ValueError(
1613-
'Given normalized_shape is '
1614-
+ str_normalized_shape
1615-
+ ', expected input with shape [*, '
1616-
+ str_normalized_shape[1:]
1617-
+ ', but got input shape '
1618-
+ str(input_shape)
1619-
)
1620-
1621-
if _non_static_mode():
1622-
if in_dygraph_mode():
1623-
pre_act, _, _, = _C_ops.layer_norm(
1624-
input,
1625-
self.weight,
1626-
self.bias,
1627-
self._epsilon,
1628-
self._begin_norm_axis,
1629-
)
1630-
return dygraph_utils._append_activation_in_dygraph(
1631-
pre_act, act=self._act
1632-
)
1633-
else:
1634-
pre_act, _, _ = _legacy_C_ops.layer_norm(
1635-
input,
1636-
self.weight,
1637-
self.bias,
1638-
'epsilon',
1639-
self._epsilon,
1640-
'begin_norm_axis',
1641-
self._begin_norm_axis,
1642-
)
1643-
return dygraph_utils._append_activation_in_dygraph(
1644-
pre_act, act=self._act
1645-
)
1646-
1647-
check_variable_and_dtype(
1648-
input, 'input', ['float32', 'float64'], 'LayerNorm'
1649-
)
1650-
1651-
inputs = dict()
1652-
inputs['X'] = [input]
1653-
if self._scale:
1654-
inputs['Scale'] = [self.weight]
1655-
if self._shift:
1656-
inputs['Bias'] = [self.bias]
1657-
attrs = {
1658-
"epsilon": self._epsilon,
1659-
"begin_norm_axis": self._begin_norm_axis,
1660-
}
1661-
1662-
# create output
1663-
mean_out = self._helper.create_variable_for_type_inference(
1664-
dtype=self._dtype, stop_gradient=True
1665-
)
1666-
variance_out = self._helper.create_variable_for_type_inference(
1667-
dtype=self._dtype, stop_gradient=True
1668-
)
1669-
layer_norm_out = self._helper.create_variable_for_type_inference(
1670-
self._dtype
1671-
)
1672-
1673-
self._helper.append_op(
1674-
type="layer_norm",
1675-
inputs=inputs,
1676-
outputs={
1677-
"Y": layer_norm_out,
1678-
"Mean": mean_out,
1679-
"Variance": variance_out,
1680-
},
1681-
attrs={
1682-
"epsilon": self._epsilon,
1683-
"begin_norm_axis": self._begin_norm_axis,
1684-
},
1685-
)
1686-
1687-
return self._helper.append_activation(layer_norm_out, act=self._act)
1688-
1689-
16901362
class GRUUnit(layers.Layer):
16911363
"""
16921364
**GRU unit layer**

0 commit comments

Comments
 (0)