remove all useless template

thisjiang · thisjiang · commit 89e412760b35 · 2021-06-10T11:51:42.000Z
diff --git a/paddle/fluid/operators/kron_op.h b/paddle/fluid/operators/kron_op.h
@@ -237,7 +237,6 @@ struct KronGradElemFunctor<platform::complex<T>> {
   const int ndims_;
 };
 
-template <typename T>
 struct IdentityFunctor {
   HOSTDEVICE explicit inline IdentityFunctor() {}
 
@@ -315,13 +314,13 @@ struct KronGradOpFunctor {
 #if defined(__NVCC__) || defined(__HIPCC__)
     auto stream = dev_ctx.stream();  // it is a cuda device_context
     if (dx) {
-      TensorReduce<T, T, cub::Sum, IdentityFunctor<T>>(
-          dout_x, dx, {1}, static_cast<T>(0), cub::Sum(), IdentityFunctor<T>(),
+      TensorReduce<T, T, cub::Sum, IdentityFunctor>(
+          dout_x, dx, {1}, static_cast<T>(0), cub::Sum(), IdentityFunctor(),
           stream);
     }
     if (dy) {
-      TensorReduce<T, T, cub::Sum, IdentityFunctor<T>>(
-          dout_y, dy, {1}, static_cast<T>(0), cub::Sum(), IdentityFunctor<T>(),
+      TensorReduce<T, T, cub::Sum, IdentityFunctor>(
+          dout_y, dy, {1}, static_cast<T>(0), cub::Sum(), IdentityFunctor(),
           stream);
     }
 #else
diff --git a/paddle/fluid/operators/matmul_v2_op.h b/paddle/fluid/operators/matmul_v2_op.h
@@ -34,7 +34,6 @@ namespace operators {
 
 using framework::Tensor;
 
-template <typename T>
 struct IdentityFunctor {
   HOSTDEVICE explicit inline IdentityFunctor() {}
 
@@ -50,9 +49,9 @@ void ReduceSumForMatmulGrad(const Tensor* input, Tensor* output,
                             const paddle::framework::ExecutionContext& ctx) {
 #if defined(__NVCC__) || defined(__HIPCC__)
   auto stream = ctx.cuda_device_context().stream();
-  TensorReduce<T, T, cub::Sum, IdentityFunctor<T>>(
-      *input, output, reduce_dims, static_cast<T>(0), cub::Sum(),
-      IdentityFunctor<T>(), stream);
+  TensorReduce<T, T, cub::Sum, IdentityFunctor>(*input, output, reduce_dims,
+                                                static_cast<T>(0), cub::Sum(),
+                                                IdentityFunctor(), stream);
 #else
   ReduceKernelFunctor<DeviceContext, T, ops::SumFunctor>(
       input, output, reduce_dims, true, false, ctx)
diff --git a/paddle/fluid/operators/reduce_ops/cub_reduce.h b/paddle/fluid/operators/reduce_ops/cub_reduce.h
@@ -436,8 +436,7 @@ void TensorReduce(const framework::Tensor& x, framework::Tensor* y,
 #undef CUB_BLOCK_DIM_CASE
 }
 
-template <typename Tx, typename ReduceOp,
-          template <typename, typename> class TransformOp>
+template <typename Tx, typename ReduceOp, template <typename> class TransformOp>
 struct TensorReduceFunctor {
   const framework::Tensor& x;
   framework::Tensor* y;
@@ -459,9 +458,9 @@ struct TensorReduceFunctor {
 
   void apply() const {
     const Ty& init_cast = static_cast<Ty>(init);
-    TensorReduce<Tx, Ty, ReduceOp, TransformOp<Tx, Ty>>(
-        x, y, origin_reduce_dims, init_cast, reducer, TransformOp<Tx, Ty>(),
-        stream);
+    TensorReduce<Tx, Ty, ReduceOp, TransformOp<Ty>>(x, y, origin_reduce_dims,
+                                                    init_cast, reducer,
+                                                    TransformOp<Ty>(), stream);
   }
 };
 
diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op.cu b/paddle/fluid/operators/reduce_ops/reduce_sum_op.cu
@@ -18,13 +18,13 @@
 namespace paddle {
 namespace operators {
 
-template <typename Tx, typename Ty = Tx>
+template <typename Tout>
 struct IdentityFunctor {
   HOSTDEVICE explicit inline IdentityFunctor() {}
 
   template <typename U>
-  HOSTDEVICE inline Ty operator()(const U& x) const {
-    return static_cast<Ty>(x);
+  HOSTDEVICE inline Tout operator()(const U& x) const {
+    return static_cast<Tout>(x);
   }
 };
 
@@ -63,9 +63,9 @@ class ReduceSumKernel : public framework::OpKernel<T> {
               *input, output, reduce_dims, static_cast<double>(0.0), cub::Sum(),
               stream));
     } else {
-      TensorReduce<T, T, cub::Sum, IdentityFunctor<T, T>>(
+      TensorReduce<T, T, cub::Sum, IdentityFunctor<T>>(
           *input, output, reduce_dims, static_cast<T>(0), cub::Sum(),
-          IdentityFunctor<T, T>(), stream);
+          IdentityFunctor<T>(), stream);
     }
   }
 };
diff --git a/paddle/fluid/operators/trace_op.cu b/paddle/fluid/operators/trace_op.cu
@@ -20,7 +20,6 @@
 namespace paddle {
 namespace operators {
 
-template <typename T>
 struct IdentityFunctor {
   HOSTDEVICE explicit inline IdentityFunctor() {}
 
@@ -48,9 +47,9 @@ class TraceCUDAKernel : public framework::OpKernel<T> {
       auto stream = context.cuda_device_context().stream();
       std::vector<int> reduce_dims;
       reduce_dims.push_back(out->dims().size());
-      TensorReduce<T, T, cub::Sum, IdentityFunctor<T>>(
+      TensorReduce<T, T, cub::Sum, IdentityFunctor>(
           diag, out, reduce_dims, static_cast<T>(0), cub::Sum(),
-          IdentityFunctor<T>(), stream);
+          IdentityFunctor(), stream);
     }
   }
 };
diff --git a/python/paddle/fluid/tests/unittests/test_reduce_op.py b/python/paddle/fluid/tests/unittests/test_reduce_op.py
@@ -50,7 +50,7 @@ def setUp(self):
         self.gradient = self.calc_gradient()
 
     def test_check_output(self):
-        self.check_output(atol=1e-2)
+        self.check_output()
 
     def calc_gradient(self):
         x = self.inputs["X"]
@@ -65,6 +65,8 @@ class TestSumOp_fp16_withInt(OpTest):
     def setUp(self):
         self.op_type = "reduce_sum"
         self.inputs = {
+            # ref to https://en.wikipedia.org/wiki/Half-precision_floating-point_format
+            # Precision limitations on integer values between 0 and 2048 can be exactly represented
             'X': np.random.randint(0, 30, (10, 10)).astype("float16")
         }
         self.attrs = {'dim': [0, 1]}