Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
235 changes: 2 additions & 233 deletions paddle/fluid/operators/kernel_primitives/functor_primitives.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,241 +13,10 @@
// limitations under the License.

#pragma once

#include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/pten/kernels/funcs/eigen/extensions.h"
#include "paddle/pten/kernels/primitive/functor_primitives.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

文件是否可以删除 ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

后续进行删除


namespace paddle {
namespace operators {
namespace kernel_primitives {
namespace details {

static __device__ __forceinline__ platform::float16 Exp(platform::float16 x) {
return ::Eigen::numext::exp(x);
}

static __device__ __forceinline__ float Exp(float x) { return expf(x); }

static __device__ __forceinline__ double Exp(double x) { return exp(x); }

static __device__ __forceinline__ platform::float16 Log(platform::float16 x) {
return ::Eigen::numext::log(x);
}

static __device__ __forceinline__ float Log(float x) { return logf(x); }

static __device__ __forceinline__ double Log(double x) { return log(x); }

} // namespace details

/******************************** Unary Functor *******************************/

/**
* @brief Default unary exp functor
*/
template <typename Tx, typename Ty = Tx>
struct ExpFunctor {
HOSTDEVICE inline ExpFunctor() {}

HOSTDEVICE explicit inline ExpFunctor(int n) {}

HOSTDEVICE inline Ty operator()(const Tx x) const {
return static_cast<Ty>(details::Exp(x));
}
};

/**
* @brief Default unary identity functor
*/
template <typename Tx, typename Ty = Tx>
struct IdentityFunctor {
HOSTDEVICE inline IdentityFunctor() {}

HOSTDEVICE explicit inline IdentityFunctor(int n) {}

HOSTDEVICE inline Ty operator()(const Tx x) const {
return static_cast<Ty>(x);
}
};

/**
* @brief Default unary div functor. Divide by a constant
*/
template <typename Tx, typename Ty = Tx>
struct DivideFunctor {
private:
using MPType = typename ::paddle::operators::details::MPTypeTrait<Tx>::Type;

public:
HOSTDEVICE inline DivideFunctor() { n_inv = static_cast<MPType>(1.0f); }

HOSTDEVICE explicit inline DivideFunctor(int n) : n_inv((MPType)(1.0 / n)) {}

HOSTDEVICE inline Ty operator()(const Tx x) const {
return static_cast<Ty>(static_cast<MPType>(x) * n_inv);
}

private:
MPType n_inv;
};

/**
* @brief Default inverse functor
*/
template <typename Tx, typename Ty = Tx>
struct InverseFunctor {
HOSTDEVICE inline InverseFunctor() {}

HOSTDEVICE explicit inline InverseFunctor(int n) {}

HOSTDEVICE inline Ty operator()(const Tx x) const {
return static_cast<Ty>(-x);
}
};

/**
* @brief Default unary square functor
*/
template <typename Tx, typename Ty = Tx>
struct SquareFunctor {
HOSTDEVICE inline SquareFunctor() {}

HOSTDEVICE explicit inline SquareFunctor(int n) {}

HOSTDEVICE inline Ty operator()(const Tx x) const {
return static_cast<Ty>(x) * static_cast<Ty>(x);
}
};

/****************************** Binary Functor ********************************/

/**
* @brief Default binary min functor
*/
template <typename T>
struct MinFunctor {
inline T initial() { return static_cast<T>(std::numeric_limits<T>::max()); }

__device__ __forceinline__ T operator()(const T a, const T b) const {
return (b < a) ? b : a;
}
};

/**
* @brief Default binary max functor
*/
template <typename T>
struct MaxFunctor {
inline T initial() {
return static_cast<T>(std::numeric_limits<T>::lowest());
}

__device__ __forceinline__ T operator()(const T a, const T b) const {
return (b > a) ? b : a;
}
};

/**
* @brief Default binary add functor
*/
template <typename T>
struct AddFunctor {
inline T initial() { return static_cast<T>(0.0f); }

__device__ __forceinline__ T operator()(const T a, const T b) const {
return b + a;
}
};

/**
* @brief Default binary add functor
*/
template <typename T>
struct MulFunctor {
inline T initial() { return static_cast<T>(1.0f); }

__device__ __forceinline__ T operator()(const T a, const T b) const {
return b * a;
}
};

/**
* @brief Default binary logic or functor
*/
template <typename T>
struct LogicalOrFunctor {
inline T initial() { return static_cast<T>(false); }

__device__ __forceinline__ T operator()(const T a, const T b) const {
return b || a;
}
};

/**
* @brief Default binary logic and functor
*/
template <typename T>
struct LogicalAndFunctor {
inline T initial() { return static_cast<T>(true); }

__device__ __forceinline__ T operator()(const T a, const T b) const {
return b && a;
}
};

/**
* @brief Default binary sub functor
*/
template <typename T>
struct SubFunctor {
inline T initial() { return static_cast<T>(0.0f); }

inline HOSTDEVICE T operator()(const T a, const T b) const { return a - b; }
};

/**
* @brief Default binary div functor
*/
template <typename T, typename Enable = void>
struct DivFunctor {
inline T initial() { return static_cast<T>(1.0f); }

inline HOSTDEVICE T operator()(const T a, const T b) const { return a / b; }
};

template <typename T>
struct DivFunctor<T,
typename std::enable_if<std::is_integral<T>::value>::type> {
inline T initial() { return static_cast<T>(1.0f); }

inline HOSTDEVICE T operator()(const T a, const T b) const {
// For int32/int64, need to check whether the divison is zero.
PADDLE_ENFORCE_NE(b, 0,
platform::errors::InvalidArgument(
"Integer division by zero encountered "
"in (floor) divide. Please check the input value."));
return a / b;
}
};

/**
* @brief Default binary floor divide functor
*/
template <typename T>
struct FloorDivFunctor {
inline T initial() { return static_cast<T>(1.0f); }

inline HOSTDEVICE T operator()(const T a, const T b) const {
PADDLE_ENFORCE_NE(b, 0,
platform::errors::InvalidArgument(
"Integer division by zero encountered "
"in (floor) divide. Please check the input value."));
return static_cast<T>(std::trunc(a / b));
}
};

} // namespace kernel_primitives
namespace kernel_primitives = pten::kps;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

希望尽快删除该文件,该文件只在paddle/fluid/operators/kernel_primitives/kernel_primitives.h中使用,可以只修改paddle/fluid/operators/kernel_primitives/kernel_primitives.h中的头文件引用。

} // namespace operators
} // namespace paddle
55 changes: 2 additions & 53 deletions paddle/fluid/operators/kernel_primitives/kernel_primitives.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,61 +13,10 @@
// limitations under the License.

#pragma once
#include "paddle/fluid/operators/kernel_primitives/helper_primitives.h"
#ifdef PADDLE_WITH_XPU2
#include "paddle/fluid/operators/kernel_primitives/compute_primitives_xpu2.h"
#include "paddle/fluid/operators/kernel_primitives/datamover_primitives_xpu2.h"
#include "paddle/fluid/operators/kernel_primitives/functor_primitives_xpu2.h"

#define KPStream XPUStream
#define KPDevice paddle::platform::XPUDeviceContext
#define _ptr_ _global_ptr_
#define __forceinline__ __inline__
#define __restrict__

#define THREAD_ID_X core_id()
#define THREAD_ID_Y 0
#define THREAD_ID_Z 0

#define BLOCK_NUM_X core_num()
#define BLOCK_NUM_Y 0
#define BLOCK_NUM_Z 0

#define BLOCK_ID_X cluster_id()
#define BLOCK_ID_Y 0
#define BLOCK_ID_Z 0

#define GRID_NUM_X cluster_num()
#define GRID_NUM_Y 0
#define GRID_NUM_Z 0
#else
#include "paddle/fluid/operators/kernel_primitives/compute_primitives.h"
#include "paddle/fluid/operators/kernel_primitives/datamover_primitives.h"
#include "paddle/fluid/operators/kernel_primitives/functor_primitives.h"

#define KPStream gpuStream_t
#define KPDevice paddle::platform::CUDADeviceContext
#define _ptr_

#define THREAD_ID_X threadIdx.x
#define THREAD_ID_Y threadIdx.y
#define THREAD_ID_Z threadIdx.z

#define BLOCK_NUM_X blockDim.x
#define BLOCK_NUM_Y blockDim.y
#define BLOCK_NUM_Z blockDim.z

#define BLOCK_ID_X blockIdx.x
#define BLOCK_ID_Y blockIdx.y
#define BLOCK_ID_Z blockIdx.z

#define GRID_NUM_X gridDim.x
#define GRID_NUM_Y gridDim.y
#define GRID_NUM_Z gridDim.z
#endif
#include "paddle/pten/kernels/primitive/kernel_primitives.h"

namespace paddle {
namespace operators {
namespace kernel_primitives {}
namespace kernel_primitives = pten::kps;
}
}
4 changes: 2 additions & 2 deletions paddle/pten/kernels/funcs/elementwise_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ limitations under the License. */
#include "paddle/pten/kernels/empty_kernel.h"

#if defined(__NVCC__) || defined(__HIPCC__)
#include "paddle/fluid/operators/kernel_primitives/kernel_primitives.h"
#include "paddle/fluid/platform/aligned_vector.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/function_traits.h"
#include "paddle/pten/kernels/primitive/kernel_primitives.h"

namespace kps = paddle::operators::kernel_primitives;
namespace kps = pten::kps;

#endif

Expand Down
4 changes: 2 additions & 2 deletions paddle/pten/kernels/gpu/reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ namespace cub = hipcub;

#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/operators/kernel_primitives/kernel_primitives.h"
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/fast_divmod.h"
#include "paddle/fluid/string/string_helper.h"
#include "paddle/pten/core/array.h"
#include "paddle/pten/core/enforce.h"
#include "paddle/pten/kernels/primitive/kernel_primitives.h"

#include "paddle/pten/api/ext/dispatch.h"
#include "paddle/pten/backends/gpu/gpu_context.h"
Expand All @@ -51,7 +51,7 @@ namespace cub = hipcub;
#define REDUCE_SPLIT_BOUNDARY 512
#define REDUCE_VEC_SIZE 4

namespace kps = paddle::operators::kernel_primitives;
namespace kps = pten::kps;

namespace pten {
namespace kernels {
Expand Down
Loading