PaddlePaddle
diff --git a/‎python/paddle/fluid/dygraph/nn.py‎
Lines changed: 0 additions & 328 deletions b/‎python/paddle/fluid/dygraph/nn.py‎
Lines changed: 0 additions & 328 deletions
@@ -53,10 +53,8 @@
     'Pool2D',
     'Linear',
     'BatchNorm',
-    'Dropout',
     'Embedding',
     'GRUUnit',
-    'LayerNorm',
     'NCE',
     'PRelu',
     'BilinearTensorProduct',
@@ -1180,124 +1178,6 @@ def forward(self, input):
         return self._helper.append_activation(batch_norm_out, self._act)
 
 
-class Dropout(layers.Layer):
-    """
-    This interface is used to construct a callable object of the ``Dropout`` class.
-    For more details, refer to code examples.
-
-    Drop or keep each element of input independently. Dropout is a regularization
-    technique for reducing overfitting by preventing neuron co-adaption during
-    training. The dropout operator randomly sets (according to the given dropout
-    probability) the outputs of some units to zero, while others are remain
-    unchanged.
-
-    Dropout layer can be removed for efficiency concern.
-
-    Parameters:
-        p (float, optional): Probability of setting units to zero. Default: 0.5
-        seed (int, optional): A Python integer used to create random seeds. If this
-                    parameter is set to None, a random seed is used.
-                    NOTE: If an integer seed is given, always the same output
-                    units will be dropped. DO NOT use a fixed seed in training. Default: None.
-        dropout_implementation(string, optional): ['downgrade_in_infer'(default)|'upscale_in_train']
-
-                                        1. downgrade_in_infer(default), downgrade the outcome at inference
-
-                                           - train: out = input * mask
-                                           - inference: out = input * (1.0 - p)
-
-                                           (mask is a tensor same shape with input, value is 0 or 1
-                                           ratio of 0 is dropout_prob)
-                                        2. upscale_in_train, upscale the outcome at training time
-
-                                           - train: out = input * mask / ( 1.0 - p )
-                                           - inference: out = input
-
-                                           (mask is a tensor same shape with input, value is 0 or 1
-                                           ratio of 0 is p)
-        is_test (bool, optional): A flag indicating whether it is in test phrase or not.
-                    This flag only has effect on static graph mode. For dygraph mode, please use ``eval()``.
-                    Default: False.
-
-    Returns:
-        None
-
-    Examples:
-
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            from paddle.fluid.dygraph.base import to_variable
-            import numpy as np
-
-            x = np.random.random(size=(3, 10, 3, 7)).astype('float32')
-            with fluid.dygraph.guard():
-                x = to_variable(x)
-                m = fluid.dygraph.Dropout(p=0.5)
-                droped_train = m(x)
-                # switch to eval mode
-                m.eval()
-                droped_eval = m(x)
-    """
-
-    def __init__(
-        self,
-        p=0.5,
-        seed=None,
-        dropout_implementation="downgrade_in_infer",
-        is_test=False,
-    ):
-        super().__init__()
-        assert isinstance(p, (float, int)), "p argument should be a number"
-        assert 0 <= p <= 1, "p argument should between 0 and 1"
-        self._dropout_prob = p
-        assert seed is None or isinstance(
-            seed, int
-        ), "seed argument should be None or a integer"
-        self._seed = seed
-        assert dropout_implementation in (
-            'downgrade_in_infer',
-            'upscale_in_train',
-        ), "dropout_implementation argument should be 'downgrade_in_infer' or 'upscale_in_train'"
-        self._dropout_implementation = dropout_implementation
-        self._is_test = is_test
-
-    def forward(self, input):
-        # fast return for p == 0
-        if self._dropout_prob == 0:
-            return input
-        prog = default_main_program()
-        if (self._seed is None or self._seed == 0) and prog.random_seed != 0:
-            self._seed = prog.random_seed
-        attrs = {
-            'dropout_prob': self._dropout_prob,
-            'is_test': not self.training
-            if _non_static_mode()
-            else self._is_test,
-            'fix_seed': self._seed is not None,
-            'seed': self._seed if self._seed is not None else 0,
-            'dropout_implementation': self._dropout_implementation,
-        }
-
-        if _non_static_mode():
-            attrs = sum(attrs.items(), ())
-            out, mask = _legacy_C_ops.dropout(input, *attrs)
-            return out
-
-        out = self._helper.create_variable_for_type_inference(dtype=input.dtype)
-        mask = self._helper.create_variable_for_type_inference(
-            dtype=core.VarDesc.VarType.UINT8, stop_gradient=True
-        )
-
-        self._helper.append_op(
-            type='dropout',
-            inputs={'X': [input]},
-            outputs={'Out': [out], 'Mask': [mask]},
-            attrs=attrs,
-        )
-        return out
-
-
 class Embedding(layers.Layer):
     r"""
     :alias_main: paddle.nn.Embedding
@@ -1479,214 +1359,6 @@ def forward(self, input):
         return out
 
 
-class LayerNorm(layers.Layer):
-    r"""
-    :alias_main: paddle.nn.LayerNorm
-        :alias: paddle.nn.LayerNorm,paddle.nn.layer.LayerNorm,paddle.nn.layer.norm.LayerNorm
-        :old_api: paddle.fluid.dygraph.LayerNorm
-
-    This interface is used to construct a callable object of the ``LayerNorm`` class.
-    For more details, refer to code examples.
-    It implements the function of the Layer Normalization Layer and can be applied to mini-batch input data.
-    Refer to `Layer Normalization <https://arxiv.org/pdf/1607.06450v1.pdf>`_
-
-    The formula is as follows:
-
-    ..  math::
-
-        \\mu & = \\frac{1}{H}\\sum_{i=1}^{H} x_i
-
-        \\sigma & = \\sqrt{\\frac{1}{H}\sum_{i=1}^{H}{(x_i - \\mu)^2} + \\epsilon}
-
-        y & = f(\\frac{g}{\\sigma}(x - \\mu) + b)
-
-    - :math:`x`: the vector representation of the summed inputs to the neurons in that layer.
-    - :math:`H`: the number of hidden units in a layers
-    - :math:`\\epsilon`: the small value added to the variance to prevent division by zero.
-    - :math:`g`: the trainable scale parameter.
-    - :math:`b`: the trainable bias parameter.
-
-    Parameters:
-        normalized_shape(int or list or tuple): Input shape from an expected input of
-            size :math:`[*, normalized_shape[0], normalized_shape[1], ..., normalized_shape[-1]]`.
-            If it is a single integer, this module will normalize over the last dimension
-            which is expected to be of that specific size.
-        scale(bool, optional): Whether to learn the adaptive gain :math:`g` after
-            normalization. Default: True.
-        shift(bool, optional): Whether to learn the adaptive bias :math:`b` after
-            normalization. Default: True.
-        epsilon(float, optional): The small value added to the variance to prevent
-            division by zero. Default: 1e-05.
-        param_attr(ParamAttr, optional): The parameter attribute for the learnable
-            gain :math:`g`. If :attr:`scale` is False, :attr:`param_attr` is
-            omitted. If :attr:`scale` is True and :attr:`param_attr` is None,
-            a default :code:`ParamAttr` would be added as scale. The
-            :attr:`param_attr` is initialized as 1 if it is added. Default: None.
-        bias_attr(ParamAttr, optional): The parameter attribute for the learnable
-            bias :math:`b`. If :attr:`shift` is False, :attr:`bias_attr` is
-            omitted. If :attr:`shift` is True and :attr:`param_attr` is None,
-            a default :code:`ParamAttr` would be added as bias. The
-            :attr:`bias_attr` is initialized as 0 if it is added. Default: None.
-        act(str, optional): Activation to be applied to the output of layer normalization.
-                  Default: None.
-        dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32".
-
-    Returns:
-        None
-
-    Examples:
-
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-          from paddle.fluid.dygraph.base import to_variable
-          import numpy
-
-          x = numpy.random.random((3, 32, 32)).astype('float32')
-          with fluid.dygraph.guard():
-              x = to_variable(x)
-              layerNorm = fluid.LayerNorm([32, 32])
-              ret = layerNorm(x)
-
-    """
-
-    def __init__(
-        self,
-        normalized_shape,
-        scale=True,
-        shift=True,
-        epsilon=1e-05,
-        param_attr=None,
-        bias_attr=None,
-        act=None,
-        dtype='float32',
-    ):
-        super().__init__()
-        if isinstance(normalized_shape, numbers.Integral):
-            normalized_shape = [normalized_shape]
-
-        self._normalized_shape = list(normalized_shape)
-        self._scale = scale
-        self._shift = shift
-        self._epsilon = epsilon
-        self._param_attr = param_attr
-        self._bias_attr = bias_attr
-        self._act = act
-        self._dtype = dtype
-        param_shape = [np.prod(self._normalized_shape)]
-        if self._scale:
-            self.weight = self.create_parameter(
-                attr=self._param_attr,
-                shape=param_shape,
-                dtype=self._dtype,
-                default_initializer=Constant(1.0),
-            )
-        else:
-            if self._param_attr:
-                logging.warn("param_attr are only available with scale is True")
-            self.weight = None
-
-        if self._shift:
-            assert self._bias_attr is not False
-            self.bias = self.create_parameter(
-                attr=self._bias_attr,
-                shape=param_shape,
-                dtype=self._dtype,
-                is_bias=True,
-            )
-        else:
-            if self._bias_attr:
-                logging.warn("bias_attr are only available with shift is True")
-            self.bias = None
-
-    def forward(self, input):
-        input_shape = list(input.shape)
-        input_ndim = len(input_shape)
-        normalized_ndim = len(self._normalized_shape)
-        self._begin_norm_axis = input_ndim - normalized_ndim
-        if (
-            input_ndim < normalized_ndim
-            or input_shape[self._begin_norm_axis :] != self._normalized_shape
-        ):
-            str_normalized_shape = str(self._normalized_shape)
-            raise ValueError(
-                'Given normalized_shape is '
-                + str_normalized_shape
-                + ', expected input with shape [*, '
-                + str_normalized_shape[1:]
-                + ', but got input shape '
-                + str(input_shape)
-            )
-
-        if _non_static_mode():
-            if in_dygraph_mode():
-                pre_act, _, _, = _C_ops.layer_norm(
-                    input,
-                    self.weight,
-                    self.bias,
-                    self._epsilon,
-                    self._begin_norm_axis,
-                )
-                return dygraph_utils._append_activation_in_dygraph(
-                    pre_act, act=self._act
-                )
-            else:
-                pre_act, _, _ = _legacy_C_ops.layer_norm(
-                    input,
-                    self.weight,
-                    self.bias,
-                    'epsilon',
-                    self._epsilon,
-                    'begin_norm_axis',
-                    self._begin_norm_axis,
-                )
-                return dygraph_utils._append_activation_in_dygraph(
-                    pre_act, act=self._act
-                )
-
-        check_variable_and_dtype(
-            input, 'input', ['float32', 'float64'], 'LayerNorm'
-        )
-
-        inputs = dict()
-        inputs['X'] = [input]
-        if self._scale:
-            inputs['Scale'] = [self.weight]
-        if self._shift:
-            inputs['Bias'] = [self.bias]
-        attrs = {
-            "epsilon": self._epsilon,
-            "begin_norm_axis": self._begin_norm_axis,
-        }
-
-        # create output
-        mean_out = self._helper.create_variable_for_type_inference(
-            dtype=self._dtype, stop_gradient=True
-        )
-        variance_out = self._helper.create_variable_for_type_inference(
-            dtype=self._dtype, stop_gradient=True
-        )
-        layer_norm_out = self._helper.create_variable_for_type_inference(
-            self._dtype
-        )
-
-        self._helper.append_op(
-            type="layer_norm",
-            inputs=inputs,
-            outputs={
-                "Y": layer_norm_out,
-                "Mean": mean_out,
-                "Variance": variance_out,
-            },
-            attrs={
-                "epsilon": self._epsilon,
-                "begin_norm_axis": self._begin_norm_axis,
-            },
-        )
-
-        return self._helper.append_activation(layer_norm_out, act=self._act)
-
-
 class GRUUnit(layers.Layer):
     """
     **GRU unit layer**