add ffn python api test

zkh2016 · zkh2016 · commit e74c8212730a · 2021-09-14T07:31:07.000Z
diff --git a/python/paddle/fluid/tests/unittests/test_fused_ffn_op.py b/python/paddle/fluid/tests/unittests/test_fused_ffn_op.py
@@ -153,10 +153,6 @@ def FusedFFN(self):
     def test_fused_ffn(self):
         base_out, base_grad = self.Base()
         fused_out, fused_grad = self.FusedFFN()
-        if base_grad is None:
-            print("base grad is none")
-        if fused_grad is None:
-            print("fused grad is none")
 
         np.testing.assert_allclose(
             base_out.numpy(), fused_out.numpy(), rtol=self.rtol, atol=self.atol)
@@ -205,5 +201,80 @@ def getShape(self):
         self.dim_feedforward = 8
 
 
+class TestFusedFFNOpApi(TestFusedFFNOp):
+    def setUp(self):
+        self.getDtype()
+        self.getShape()
+        self.getDiff()
+        self.getActivation()
+        self.getNormalizeBefore()
+        self.weight_attr = None
+        self.bias_attr = None
+
+        self.weight_attrs = fused_transformer._convert_param_attr_to_list(
+            self.weight_attr, 2)
+        self.bias_attrs = fused_transformer._convert_param_attr_to_list(
+            self.bias_attr, 2)
+        self.ffn_layer = fused_transformer.FusedFeedForward(
+            self.d_model, self.dim_feedforward, 0.0, self.act_method, 0.0,
+            self.normalize_before, self.weight_attrs[1], self.bias_attrs[1])
+
+        self.ln1_scale = self.ffn_layer._ln1_scale
+        self.ln1_bias = self.ffn_layer._ln1_bias
+        self.ln2_scale = self.ffn_layer._ln2_scale
+        self.ln2_bias = self.ffn_layer._ln2_bias
+        self.linear1_weight = self.ffn_layer._linear1_weight
+        self.linear1_bias = self.ffn_layer._linear1_bias
+        self.linear2_weight = self.ffn_layer._linear2_weight
+        self.linear2_bias = self.ffn_layer._linear2_bias
+
+        self.src = np.random.random((self.batch_size, self.query_length,
+                                     self.d_model)).astype(self.dtype)
+        self.dout = np.random.random((self.batch_size, self.query_length,
+                                      self.d_model)).astype(self.dtype)
+
+        self.dropout1 = Dropout(0.0, mode="upscale_in_train")
+        self.dropout2 = Dropout(0.0, mode="upscale_in_train")
+        self.activation = getattr(F, self.act_method)
+
+    def Base(self):
+        tensor_src = paddle.to_tensor(self.src, stop_gradient=False)
+        residual = paddle.to_tensor(self.src)
+        if self.normalize_before:
+            ln1_out = F.layer_norm(tensor_src,
+                                   list([self.d_model]), self.ln1_scale,
+                                   self.ln1_bias)
+            linear1_out = F.linear(ln1_out, self.linear1_weight,
+                                   self.linear1_bias)
+            act_out = self.activation(linear1_out)
+            dropout1_out = self.dropout1(act_out)
+            linear2_out = F.linear(dropout1_out, self.linear2_weight,
+                                   self.linear2_bias)
+            dropout2_out = residual + self.dropout2(linear2_out)
+            paddle.autograd.backward([dropout2_out],
+                                     [paddle.to_tensor(self.dout)], True)
+            return dropout2_out, tensor_src.grad
+        else:
+            linear1_out = F.linear(tensor_src, self.linear1_weight,
+                                   self.linear1_bias)
+            act_out = self.activation(linear1_out)
+            dropout1_out = self.dropout1(act_out)
+            linear2_out = F.linear(dropout1_out, self.linear2_weight,
+                                   self.linear2_bias)
+            dropout2_out = residual + self.dropout2(linear2_out)
+            dropout2_out = F.layer_norm(dropout2_out,
+                                        list([self.d_model]), self.ln2_scale,
+                                        self.ln2_bias)
+            paddle.autograd.backward([dropout2_out],
+                                     [paddle.to_tensor(self.dout)], True)
+            return dropout2_out, tensor_src.grad
+
+    def FusedFFN(self):
+        tensor_src = paddle.to_tensor(self.src, stop_gradient=False)
+        out = self.ffn_layer(tensor_src)
+        paddle.autograd.backward([out], [paddle.to_tensor(self.dout)])
+        return out, tensor_src.grad
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/nn/functional/fused_ffn.py b/python/paddle/nn/functional/fused_ffn.py
@@ -23,6 +23,16 @@
 __all__ = []
 
 
+def _verify_dropout_param(p, mode):
+    if not isinstance(p, (float, int)):
+        raise TypeError("p argument should be a number")
+    if p < 0 or p > 1:
+        raise ValueError("p argument should between 0 and 1")
+    if mode not in ('downscale_in_infer', 'upscale_in_train'):
+        raise ValueError(
+            "mode argument should be 'downscale_in_infer' or 'upscale_in_train'")
+
+
 def fused_ffn(x,
               linear1_weight,
               linear2_weight,
@@ -49,6 +59,8 @@ def fused_ffn(x,
               seed2=0,
               normalize_pre_or_post=False,
               name=None):
+    _verify_dropout_param(dropout_prob1, dropout_implementation1)
+    _verify_dropout_param(dropout_prob2, dropout_implementation2)
 
     if in_dygraph_mode():
         out, _, _, _, _, _, _, _, _, _, _ = _C_ops.fused_ffn(
diff --git a/python/paddle/nn/layer/fused_transformer.py b/python/paddle/nn/layer/fused_transformer.py
@@ -16,6 +16,7 @@
 from .. import functional as F
 from paddle.nn import Layer
 from ...framework import ParamAttr
+from ..initializer import Constant
 
 import collections
 
@@ -188,9 +189,11 @@ def __init__(self,
                  bias_attr=None):
 
         super(FusedFeedForward, self).__init__()
-
-        #self._weight_attrs = _convert_param_attr_to_list(weight_attr, 2)
-        #self._bias_attrs = _convert_param_attr_to_list(bias_attr, 2)
+        assert d_model > 0, ("Expected d_model to be greater than 0, "
+                             "but recieved {}".format(d_model))
+        assert dim_feedforward > 0, (
+            "Expected dim_feedforward to be greater than 0, "
+            "but recieved {}".format(dim_feedforward))
 
         self._dtype = self._helper.get_default_dtype()
         self._d_model = d_model
@@ -199,8 +202,6 @@ def __init__(self,
         self._act_dropout = dropout if act_dropout is None else act_dropout
         self._act_method = activation
         self._normalize_before = normalize_before
-        #self._weight_attr = self._weight_attrs[1]
-        #self._bias_attr = self._bias_attrs[1]
 
         self._linear1_weight = self.create_parameter(
             shape=[d_model, dim_feedforward],