|
53 | 53 | 'Pool2D', |
54 | 54 | 'Linear', |
55 | 55 | 'BatchNorm', |
56 | | - 'Dropout', |
57 | 56 | 'Embedding', |
58 | 57 | 'GRUUnit', |
59 | | - 'LayerNorm', |
60 | 58 | 'NCE', |
61 | 59 | 'PRelu', |
62 | 60 | 'BilinearTensorProduct', |
@@ -1180,124 +1178,6 @@ def forward(self, input): |
1180 | 1178 | return self._helper.append_activation(batch_norm_out, self._act) |
1181 | 1179 |
|
1182 | 1180 |
|
1183 | | -class Dropout(layers.Layer): |
1184 | | - """ |
1185 | | - This interface is used to construct a callable object of the ``Dropout`` class. |
1186 | | - For more details, refer to code examples. |
1187 | | -
|
1188 | | - Drop or keep each element of input independently. Dropout is a regularization |
1189 | | - technique for reducing overfitting by preventing neuron co-adaption during |
1190 | | - training. The dropout operator randomly sets (according to the given dropout |
1191 | | - probability) the outputs of some units to zero, while others are remain |
1192 | | - unchanged. |
1193 | | -
|
1194 | | - Dropout layer can be removed for efficiency concern. |
1195 | | -
|
1196 | | - Parameters: |
1197 | | - p (float, optional): Probability of setting units to zero. Default: 0.5 |
1198 | | - seed (int, optional): A Python integer used to create random seeds. If this |
1199 | | - parameter is set to None, a random seed is used. |
1200 | | - NOTE: If an integer seed is given, always the same output |
1201 | | - units will be dropped. DO NOT use a fixed seed in training. Default: None. |
1202 | | - dropout_implementation(string, optional): ['downgrade_in_infer'(default)|'upscale_in_train'] |
1203 | | -
|
1204 | | - 1. downgrade_in_infer(default), downgrade the outcome at inference |
1205 | | -
|
1206 | | - - train: out = input * mask |
1207 | | - - inference: out = input * (1.0 - p) |
1208 | | -
|
1209 | | - (mask is a tensor same shape with input, value is 0 or 1 |
1210 | | - ratio of 0 is dropout_prob) |
1211 | | - 2. upscale_in_train, upscale the outcome at training time |
1212 | | -
|
1213 | | - - train: out = input * mask / ( 1.0 - p ) |
1214 | | - - inference: out = input |
1215 | | -
|
1216 | | - (mask is a tensor same shape with input, value is 0 or 1 |
1217 | | - ratio of 0 is p) |
1218 | | - is_test (bool, optional): A flag indicating whether it is in test phrase or not. |
1219 | | - This flag only has effect on static graph mode. For dygraph mode, please use ``eval()``. |
1220 | | - Default: False. |
1221 | | -
|
1222 | | - Returns: |
1223 | | - None |
1224 | | -
|
1225 | | - Examples: |
1226 | | -
|
1227 | | - .. code-block:: python |
1228 | | -
|
1229 | | - import paddle.fluid as fluid |
1230 | | - from paddle.fluid.dygraph.base import to_variable |
1231 | | - import numpy as np |
1232 | | -
|
1233 | | - x = np.random.random(size=(3, 10, 3, 7)).astype('float32') |
1234 | | - with fluid.dygraph.guard(): |
1235 | | - x = to_variable(x) |
1236 | | - m = fluid.dygraph.Dropout(p=0.5) |
1237 | | - droped_train = m(x) |
1238 | | - # switch to eval mode |
1239 | | - m.eval() |
1240 | | - droped_eval = m(x) |
1241 | | - """ |
1242 | | - |
1243 | | - def __init__( |
1244 | | - self, |
1245 | | - p=0.5, |
1246 | | - seed=None, |
1247 | | - dropout_implementation="downgrade_in_infer", |
1248 | | - is_test=False, |
1249 | | - ): |
1250 | | - super().__init__() |
1251 | | - assert isinstance(p, (float, int)), "p argument should be a number" |
1252 | | - assert 0 <= p <= 1, "p argument should between 0 and 1" |
1253 | | - self._dropout_prob = p |
1254 | | - assert seed is None or isinstance( |
1255 | | - seed, int |
1256 | | - ), "seed argument should be None or a integer" |
1257 | | - self._seed = seed |
1258 | | - assert dropout_implementation in ( |
1259 | | - 'downgrade_in_infer', |
1260 | | - 'upscale_in_train', |
1261 | | - ), "dropout_implementation argument should be 'downgrade_in_infer' or 'upscale_in_train'" |
1262 | | - self._dropout_implementation = dropout_implementation |
1263 | | - self._is_test = is_test |
1264 | | - |
1265 | | - def forward(self, input): |
1266 | | - # fast return for p == 0 |
1267 | | - if self._dropout_prob == 0: |
1268 | | - return input |
1269 | | - prog = default_main_program() |
1270 | | - if (self._seed is None or self._seed == 0) and prog.random_seed != 0: |
1271 | | - self._seed = prog.random_seed |
1272 | | - attrs = { |
1273 | | - 'dropout_prob': self._dropout_prob, |
1274 | | - 'is_test': not self.training |
1275 | | - if _non_static_mode() |
1276 | | - else self._is_test, |
1277 | | - 'fix_seed': self._seed is not None, |
1278 | | - 'seed': self._seed if self._seed is not None else 0, |
1279 | | - 'dropout_implementation': self._dropout_implementation, |
1280 | | - } |
1281 | | - |
1282 | | - if _non_static_mode(): |
1283 | | - attrs = sum(attrs.items(), ()) |
1284 | | - out, mask = _legacy_C_ops.dropout(input, *attrs) |
1285 | | - return out |
1286 | | - |
1287 | | - out = self._helper.create_variable_for_type_inference(dtype=input.dtype) |
1288 | | - mask = self._helper.create_variable_for_type_inference( |
1289 | | - dtype=core.VarDesc.VarType.UINT8, stop_gradient=True |
1290 | | - ) |
1291 | | - |
1292 | | - self._helper.append_op( |
1293 | | - type='dropout', |
1294 | | - inputs={'X': [input]}, |
1295 | | - outputs={'Out': [out], 'Mask': [mask]}, |
1296 | | - attrs=attrs, |
1297 | | - ) |
1298 | | - return out |
1299 | | - |
1300 | | - |
1301 | 1181 | class Embedding(layers.Layer): |
1302 | 1182 | r""" |
1303 | 1183 | :alias_main: paddle.nn.Embedding |
@@ -1479,214 +1359,6 @@ def forward(self, input): |
1479 | 1359 | return out |
1480 | 1360 |
|
1481 | 1361 |
|
1482 | | -class LayerNorm(layers.Layer): |
1483 | | - r""" |
1484 | | - :alias_main: paddle.nn.LayerNorm |
1485 | | - :alias: paddle.nn.LayerNorm,paddle.nn.layer.LayerNorm,paddle.nn.layer.norm.LayerNorm |
1486 | | - :old_api: paddle.fluid.dygraph.LayerNorm |
1487 | | -
|
1488 | | - This interface is used to construct a callable object of the ``LayerNorm`` class. |
1489 | | - For more details, refer to code examples. |
1490 | | - It implements the function of the Layer Normalization Layer and can be applied to mini-batch input data. |
1491 | | - Refer to `Layer Normalization <https://arxiv.org/pdf/1607.06450v1.pdf>`_ |
1492 | | -
|
1493 | | - The formula is as follows: |
1494 | | -
|
1495 | | - .. math:: |
1496 | | -
|
1497 | | - \\mu & = \\frac{1}{H}\\sum_{i=1}^{H} x_i |
1498 | | -
|
1499 | | - \\sigma & = \\sqrt{\\frac{1}{H}\sum_{i=1}^{H}{(x_i - \\mu)^2} + \\epsilon} |
1500 | | -
|
1501 | | - y & = f(\\frac{g}{\\sigma}(x - \\mu) + b) |
1502 | | -
|
1503 | | - - :math:`x`: the vector representation of the summed inputs to the neurons in that layer. |
1504 | | - - :math:`H`: the number of hidden units in a layers |
1505 | | - - :math:`\\epsilon`: the small value added to the variance to prevent division by zero. |
1506 | | - - :math:`g`: the trainable scale parameter. |
1507 | | - - :math:`b`: the trainable bias parameter. |
1508 | | -
|
1509 | | - Parameters: |
1510 | | - normalized_shape(int or list or tuple): Input shape from an expected input of |
1511 | | - size :math:`[*, normalized_shape[0], normalized_shape[1], ..., normalized_shape[-1]]`. |
1512 | | - If it is a single integer, this module will normalize over the last dimension |
1513 | | - which is expected to be of that specific size. |
1514 | | - scale(bool, optional): Whether to learn the adaptive gain :math:`g` after |
1515 | | - normalization. Default: True. |
1516 | | - shift(bool, optional): Whether to learn the adaptive bias :math:`b` after |
1517 | | - normalization. Default: True. |
1518 | | - epsilon(float, optional): The small value added to the variance to prevent |
1519 | | - division by zero. Default: 1e-05. |
1520 | | - param_attr(ParamAttr, optional): The parameter attribute for the learnable |
1521 | | - gain :math:`g`. If :attr:`scale` is False, :attr:`param_attr` is |
1522 | | - omitted. If :attr:`scale` is True and :attr:`param_attr` is None, |
1523 | | - a default :code:`ParamAttr` would be added as scale. The |
1524 | | - :attr:`param_attr` is initialized as 1 if it is added. Default: None. |
1525 | | - bias_attr(ParamAttr, optional): The parameter attribute for the learnable |
1526 | | - bias :math:`b`. If :attr:`shift` is False, :attr:`bias_attr` is |
1527 | | - omitted. If :attr:`shift` is True and :attr:`param_attr` is None, |
1528 | | - a default :code:`ParamAttr` would be added as bias. The |
1529 | | - :attr:`bias_attr` is initialized as 0 if it is added. Default: None. |
1530 | | - act(str, optional): Activation to be applied to the output of layer normalization. |
1531 | | - Default: None. |
1532 | | - dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32". |
1533 | | -
|
1534 | | - Returns: |
1535 | | - None |
1536 | | -
|
1537 | | - Examples: |
1538 | | -
|
1539 | | - .. code-block:: python |
1540 | | -
|
1541 | | - import paddle.fluid as fluid |
1542 | | - from paddle.fluid.dygraph.base import to_variable |
1543 | | - import numpy |
1544 | | -
|
1545 | | - x = numpy.random.random((3, 32, 32)).astype('float32') |
1546 | | - with fluid.dygraph.guard(): |
1547 | | - x = to_variable(x) |
1548 | | - layerNorm = fluid.LayerNorm([32, 32]) |
1549 | | - ret = layerNorm(x) |
1550 | | -
|
1551 | | - """ |
1552 | | - |
1553 | | - def __init__( |
1554 | | - self, |
1555 | | - normalized_shape, |
1556 | | - scale=True, |
1557 | | - shift=True, |
1558 | | - epsilon=1e-05, |
1559 | | - param_attr=None, |
1560 | | - bias_attr=None, |
1561 | | - act=None, |
1562 | | - dtype='float32', |
1563 | | - ): |
1564 | | - super().__init__() |
1565 | | - if isinstance(normalized_shape, numbers.Integral): |
1566 | | - normalized_shape = [normalized_shape] |
1567 | | - |
1568 | | - self._normalized_shape = list(normalized_shape) |
1569 | | - self._scale = scale |
1570 | | - self._shift = shift |
1571 | | - self._epsilon = epsilon |
1572 | | - self._param_attr = param_attr |
1573 | | - self._bias_attr = bias_attr |
1574 | | - self._act = act |
1575 | | - self._dtype = dtype |
1576 | | - param_shape = [np.prod(self._normalized_shape)] |
1577 | | - if self._scale: |
1578 | | - self.weight = self.create_parameter( |
1579 | | - attr=self._param_attr, |
1580 | | - shape=param_shape, |
1581 | | - dtype=self._dtype, |
1582 | | - default_initializer=Constant(1.0), |
1583 | | - ) |
1584 | | - else: |
1585 | | - if self._param_attr: |
1586 | | - logging.warn("param_attr are only available with scale is True") |
1587 | | - self.weight = None |
1588 | | - |
1589 | | - if self._shift: |
1590 | | - assert self._bias_attr is not False |
1591 | | - self.bias = self.create_parameter( |
1592 | | - attr=self._bias_attr, |
1593 | | - shape=param_shape, |
1594 | | - dtype=self._dtype, |
1595 | | - is_bias=True, |
1596 | | - ) |
1597 | | - else: |
1598 | | - if self._bias_attr: |
1599 | | - logging.warn("bias_attr are only available with shift is True") |
1600 | | - self.bias = None |
1601 | | - |
1602 | | - def forward(self, input): |
1603 | | - input_shape = list(input.shape) |
1604 | | - input_ndim = len(input_shape) |
1605 | | - normalized_ndim = len(self._normalized_shape) |
1606 | | - self._begin_norm_axis = input_ndim - normalized_ndim |
1607 | | - if ( |
1608 | | - input_ndim < normalized_ndim |
1609 | | - or input_shape[self._begin_norm_axis :] != self._normalized_shape |
1610 | | - ): |
1611 | | - str_normalized_shape = str(self._normalized_shape) |
1612 | | - raise ValueError( |
1613 | | - 'Given normalized_shape is ' |
1614 | | - + str_normalized_shape |
1615 | | - + ', expected input with shape [*, ' |
1616 | | - + str_normalized_shape[1:] |
1617 | | - + ', but got input shape ' |
1618 | | - + str(input_shape) |
1619 | | - ) |
1620 | | - |
1621 | | - if _non_static_mode(): |
1622 | | - if in_dygraph_mode(): |
1623 | | - pre_act, _, _, = _C_ops.layer_norm( |
1624 | | - input, |
1625 | | - self.weight, |
1626 | | - self.bias, |
1627 | | - self._epsilon, |
1628 | | - self._begin_norm_axis, |
1629 | | - ) |
1630 | | - return dygraph_utils._append_activation_in_dygraph( |
1631 | | - pre_act, act=self._act |
1632 | | - ) |
1633 | | - else: |
1634 | | - pre_act, _, _ = _legacy_C_ops.layer_norm( |
1635 | | - input, |
1636 | | - self.weight, |
1637 | | - self.bias, |
1638 | | - 'epsilon', |
1639 | | - self._epsilon, |
1640 | | - 'begin_norm_axis', |
1641 | | - self._begin_norm_axis, |
1642 | | - ) |
1643 | | - return dygraph_utils._append_activation_in_dygraph( |
1644 | | - pre_act, act=self._act |
1645 | | - ) |
1646 | | - |
1647 | | - check_variable_and_dtype( |
1648 | | - input, 'input', ['float32', 'float64'], 'LayerNorm' |
1649 | | - ) |
1650 | | - |
1651 | | - inputs = dict() |
1652 | | - inputs['X'] = [input] |
1653 | | - if self._scale: |
1654 | | - inputs['Scale'] = [self.weight] |
1655 | | - if self._shift: |
1656 | | - inputs['Bias'] = [self.bias] |
1657 | | - attrs = { |
1658 | | - "epsilon": self._epsilon, |
1659 | | - "begin_norm_axis": self._begin_norm_axis, |
1660 | | - } |
1661 | | - |
1662 | | - # create output |
1663 | | - mean_out = self._helper.create_variable_for_type_inference( |
1664 | | - dtype=self._dtype, stop_gradient=True |
1665 | | - ) |
1666 | | - variance_out = self._helper.create_variable_for_type_inference( |
1667 | | - dtype=self._dtype, stop_gradient=True |
1668 | | - ) |
1669 | | - layer_norm_out = self._helper.create_variable_for_type_inference( |
1670 | | - self._dtype |
1671 | | - ) |
1672 | | - |
1673 | | - self._helper.append_op( |
1674 | | - type="layer_norm", |
1675 | | - inputs=inputs, |
1676 | | - outputs={ |
1677 | | - "Y": layer_norm_out, |
1678 | | - "Mean": mean_out, |
1679 | | - "Variance": variance_out, |
1680 | | - }, |
1681 | | - attrs={ |
1682 | | - "epsilon": self._epsilon, |
1683 | | - "begin_norm_axis": self._begin_norm_axis, |
1684 | | - }, |
1685 | | - ) |
1686 | | - |
1687 | | - return self._helper.append_activation(layer_norm_out, act=self._act) |
1688 | | - |
1689 | | - |
1690 | 1362 | class GRUUnit(layers.Layer): |
1691 | 1363 | """ |
1692 | 1364 | **GRU unit layer** |
|
0 commit comments