PaddlePaddle · YuanRisheng · Jan 26, 2022 · Jan 24, 2022 · Jan 24, 2022 · Jan 24, 2022
diff --git a/paddle/fluid/operators/kernel_primitives/functor_primitives.h b/paddle/fluid/operators/kernel_primitives/functor_primitives.h
@@ -13,241 +13,10 @@
 // limitations under the License.
 
 #pragma once
-
-#include "paddle/fluid/operators/amp/fp16_type_traits.h"
-#include "paddle/fluid/platform/enforce.h"
-#include "paddle/fluid/platform/float16.h"
-#include "paddle/pten/kernels/funcs/eigen/extensions.h"
+#include "paddle/pten/kernels/primitive/functor_primitives.h"
 
 namespace paddle {
 namespace operators {
-namespace kernel_primitives {
-namespace details {
-
-static __device__ __forceinline__ platform::float16 Exp(platform::float16 x) {
-  return ::Eigen::numext::exp(x);
-}
-
-static __device__ __forceinline__ float Exp(float x) { return expf(x); }
-
-static __device__ __forceinline__ double Exp(double x) { return exp(x); }
-
-static __device__ __forceinline__ platform::float16 Log(platform::float16 x) {
-  return ::Eigen::numext::log(x);
-}
-
-static __device__ __forceinline__ float Log(float x) { return logf(x); }
-
-static __device__ __forceinline__ double Log(double x) { return log(x); }
-
-}  // namespace details
-
-/******************************** Unary Functor *******************************/
-
-/**
- * @brief Default unary exp functor
- */
-template <typename Tx, typename Ty = Tx>
-struct ExpFunctor {
-  HOSTDEVICE inline ExpFunctor() {}
-
-  HOSTDEVICE explicit inline ExpFunctor(int n) {}
-
-  HOSTDEVICE inline Ty operator()(const Tx x) const {
-    return static_cast<Ty>(details::Exp(x));
-  }
-};
-
-/**
- * @brief Default unary identity functor
- */
-template <typename Tx, typename Ty = Tx>
-struct IdentityFunctor {
-  HOSTDEVICE inline IdentityFunctor() {}
-
-  HOSTDEVICE explicit inline IdentityFunctor(int n) {}
-
-  HOSTDEVICE inline Ty operator()(const Tx x) const {
-    return static_cast<Ty>(x);
-  }
-};
-
-/**
- * @brief Default unary div functor. Divide by a constant
- */
-template <typename Tx, typename Ty = Tx>
-struct DivideFunctor {
- private:
-  using MPType = typename ::paddle::operators::details::MPTypeTrait<Tx>::Type;
-
- public:
-  HOSTDEVICE inline DivideFunctor() { n_inv = static_cast<MPType>(1.0f); }
-
-  HOSTDEVICE explicit inline DivideFunctor(int n) : n_inv((MPType)(1.0 / n)) {}
-
-  HOSTDEVICE inline Ty operator()(const Tx x) const {
-    return static_cast<Ty>(static_cast<MPType>(x) * n_inv);
-  }
-
- private:
-  MPType n_inv;
-};
-
-/**
- * @brief Default inverse functor
- */
-template <typename Tx, typename Ty = Tx>
-struct InverseFunctor {
-  HOSTDEVICE inline InverseFunctor() {}
-
-  HOSTDEVICE explicit inline InverseFunctor(int n) {}
-
-  HOSTDEVICE inline Ty operator()(const Tx x) const {
-    return static_cast<Ty>(-x);
-  }
-};
-
-/**
- * @brief Default unary square functor
- */
-template <typename Tx, typename Ty = Tx>
-struct SquareFunctor {
-  HOSTDEVICE inline SquareFunctor() {}
-
-  HOSTDEVICE explicit inline SquareFunctor(int n) {}
-
-  HOSTDEVICE inline Ty operator()(const Tx x) const {
-    return static_cast<Ty>(x) * static_cast<Ty>(x);
-  }
-};
-
-/****************************** Binary Functor ********************************/
-
-/**
- * @brief Default binary min functor
- */
-template <typename T>
-struct MinFunctor {
-  inline T initial() { return static_cast<T>(std::numeric_limits<T>::max()); }
-
-  __device__ __forceinline__ T operator()(const T a, const T b) const {
-    return (b < a) ? b : a;
-  }
-};
-
-/**
- * @brief Default binary max functor
- */
-template <typename T>
-struct MaxFunctor {
-  inline T initial() {
-    return static_cast<T>(std::numeric_limits<T>::lowest());
-  }
-
-  __device__ __forceinline__ T operator()(const T a, const T b) const {
-    return (b > a) ? b : a;
-  }
-};
-
-/**
- * @brief Default binary add functor
- */
-template <typename T>
-struct AddFunctor {
-  inline T initial() { return static_cast<T>(0.0f); }
-
-  __device__ __forceinline__ T operator()(const T a, const T b) const {
-    return b + a;
-  }
-};
-
-/**
- * @brief Default binary add functor
- */
-template <typename T>
-struct MulFunctor {
-  inline T initial() { return static_cast<T>(1.0f); }
-
-  __device__ __forceinline__ T operator()(const T a, const T b) const {
-    return b * a;
-  }
-};
-
-/**
- * @brief Default binary logic or functor
- */
-template <typename T>
-struct LogicalOrFunctor {
-  inline T initial() { return static_cast<T>(false); }
-
-  __device__ __forceinline__ T operator()(const T a, const T b) const {
-    return b || a;
-  }
-};
-
-/**
- * @brief Default binary logic and functor
- */
-template <typename T>
-struct LogicalAndFunctor {
-  inline T initial() { return static_cast<T>(true); }
-
-  __device__ __forceinline__ T operator()(const T a, const T b) const {
-    return b && a;
-  }
-};
-
-/**
- * @brief Default binary sub functor
- */
-template <typename T>
-struct SubFunctor {
-  inline T initial() { return static_cast<T>(0.0f); }
-
-  inline HOSTDEVICE T operator()(const T a, const T b) const { return a - b; }
-};
-
-/**
- * @brief Default binary div functor
- */
-template <typename T, typename Enable = void>
-struct DivFunctor {
-  inline T initial() { return static_cast<T>(1.0f); }
-
-  inline HOSTDEVICE T operator()(const T a, const T b) const { return a / b; }
-};
-
-template <typename T>
-struct DivFunctor<T,
-                  typename std::enable_if<std::is_integral<T>::value>::type> {
-  inline T initial() { return static_cast<T>(1.0f); }
-
-  inline HOSTDEVICE T operator()(const T a, const T b) const {
-    // For int32/int64, need to check whether the divison is zero.
-    PADDLE_ENFORCE_NE(b, 0,
-                      platform::errors::InvalidArgument(
-                          "Integer division by zero encountered "
-                          "in (floor) divide. Please check the input value."));
-    return a / b;
-  }
-};
-
-/**
- * @brief Default binary floor divide functor
- */
-template <typename T>
-struct FloorDivFunctor {
-  inline T initial() { return static_cast<T>(1.0f); }
-
-  inline HOSTDEVICE T operator()(const T a, const T b) const {
-    PADDLE_ENFORCE_NE(b, 0,
-                      platform::errors::InvalidArgument(
-                          "Integer division by zero encountered "
-                          "in (floor) divide. Please check the input value."));
-    return static_cast<T>(std::trunc(a / b));
-  }
-};
-
-}  // namespace kernel_primitives
+namespace kernel_primitives = pten::kps;
 }  // namespace operators
 }  // namespace paddle
diff --git a/paddle/fluid/operators/kernel_primitives/kernel_primitives.h b/paddle/fluid/operators/kernel_primitives/kernel_primitives.h
@@ -13,61 +13,10 @@
 // limitations under the License.
 
 #pragma once
-#include "paddle/fluid/operators/kernel_primitives/helper_primitives.h"
-#ifdef PADDLE_WITH_XPU2
-#include "paddle/fluid/operators/kernel_primitives/compute_primitives_xpu2.h"
-#include "paddle/fluid/operators/kernel_primitives/datamover_primitives_xpu2.h"
-#include "paddle/fluid/operators/kernel_primitives/functor_primitives_xpu2.h"
-
-#define KPStream XPUStream
-#define KPDevice paddle::platform::XPUDeviceContext
-#define _ptr_ _global_ptr_
-#define __forceinline__ __inline__
-#define __restrict__
-
-#define THREAD_ID_X core_id()
-#define THREAD_ID_Y 0
-#define THREAD_ID_Z 0
-
-#define BLOCK_NUM_X core_num()
-#define BLOCK_NUM_Y 0
-#define BLOCK_NUM_Z 0
-
-#define BLOCK_ID_X cluster_id()
-#define BLOCK_ID_Y 0
-#define BLOCK_ID_Z 0
-
-#define GRID_NUM_X cluster_num()
-#define GRID_NUM_Y 0
-#define GRID_NUM_Z 0
-#else
-#include "paddle/fluid/operators/kernel_primitives/compute_primitives.h"
-#include "paddle/fluid/operators/kernel_primitives/datamover_primitives.h"
-#include "paddle/fluid/operators/kernel_primitives/functor_primitives.h"
-
-#define KPStream gpuStream_t
-#define KPDevice paddle::platform::CUDADeviceContext
-#define _ptr_
-
-#define THREAD_ID_X threadIdx.x
-#define THREAD_ID_Y threadIdx.y
-#define THREAD_ID_Z threadIdx.z
-
-#define BLOCK_NUM_X blockDim.x
-#define BLOCK_NUM_Y blockDim.y
-#define BLOCK_NUM_Z blockDim.z
-
-#define BLOCK_ID_X blockIdx.x
-#define BLOCK_ID_Y blockIdx.y
-#define BLOCK_ID_Z blockIdx.z
-
-#define GRID_NUM_X gridDim.x
-#define GRID_NUM_Y gridDim.y
-#define GRID_NUM_Z gridDim.z
-#endif
+#include "paddle/pten/kernels/primitive/kernel_primitives.h"
 
 namespace paddle {
 namespace operators {
-namespace kernel_primitives {}
+namespace kernel_primitives = pten::kps;
 }
 }
diff --git a/paddle/pten/kernels/funcs/elementwise_base.h b/paddle/pten/kernels/funcs/elementwise_base.h
@@ -22,12 +22,12 @@ limitations under the License. */
 #include "paddle/pten/kernels/empty_kernel.h"
 
 #if defined(__NVCC__) || defined(__HIPCC__)
-#include "paddle/fluid/operators/kernel_primitives/kernel_primitives.h"
 #include "paddle/fluid/platform/aligned_vector.h"
 #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
 #include "paddle/fluid/platform/function_traits.h"
+#include "paddle/pten/kernels/primitive/kernel_primitives.h"
 
-namespace kps = paddle::operators::kernel_primitives;
+namespace kps = pten::kps;
 
 #endif
 

diff --git a/paddle/pten/kernels/gpu/reduce.h b/paddle/pten/kernels/gpu/reduce.h
@@ -34,13 +34,13 @@ namespace cub = hipcub;
 
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/amp/fp16_type_traits.h"
-#include "paddle/fluid/operators/kernel_primitives/kernel_primitives.h"
 #include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
 #include "paddle/fluid/platform/device/gpu/gpu_info.h"
 #include "paddle/fluid/platform/fast_divmod.h"
 #include "paddle/fluid/string/string_helper.h"
 #include "paddle/pten/core/array.h"
 #include "paddle/pten/core/enforce.h"
+#include "paddle/pten/kernels/primitive/kernel_primitives.h"
 
 #include "paddle/pten/api/ext/dispatch.h"
 #include "paddle/pten/backends/gpu/gpu_context.h"
@@ -51,7 +51,7 @@ namespace cub = hipcub;
 #define REDUCE_SPLIT_BOUNDARY 512
 #define REDUCE_VEC_SIZE 4
 
-namespace kps = paddle::operators::kernel_primitives;
+namespace kps = pten::kps;
 
 namespace pten {
 namespace kernels {