Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
98 commits
Select commit Hold shift + click to select a range
c4ce9c5
merge 31065
xymyeah Feb 25, 2021
bd13ff2
Fix typo of selected_npus (#31230)
zhiqiu Feb 25, 2021
0592003
merge 31249
xymyeah Feb 26, 2021
5a69ab8
[NPU] Support npu op pow and pow grad (#31247)
liym27 Feb 26, 2021
e8b5505
Fix pow npu fp16 test (#31256)
liym27 Feb 26, 2021
9851897
support list of list attribute for NPU (#31299)
zhiqiu Mar 1, 2021
c5474d7
[NPU] Support npu op: (1) slice (2) slice_grad (#31275)
liym27 Mar 1, 2021
1085db5
fix reading flags from env (#31329)
zhiqiu Mar 1, 2021
1e952c3
merge 31347
zhiqiu Mar 2, 2021
19aca4f
[NPU] Support npu op layer_norm and layer_norm_grad (#31310)
zhiqiu Mar 2, 2021
b8789b0
[NPU] add npu kernel for equal op (#31393)
zhiqiu Mar 4, 2021
c457e6b
[NPU] Support npu kernel for shape op (#31427)
frankwhzhang Mar 4, 2021
832548f
fix endif (#31431)
frankwhzhang Mar 4, 2021
b788ae0
Fix pow, use fillD instead of broadcast (#31433)
zhiqiu Mar 4, 2021
37ed82e
Fix pow, refine code (#31440)
zhiqiu Mar 5, 2021
612a998
fix cmake of cryptopp to avoid downloading every time (#31451)
zhiqiu Mar 8, 2021
792c8e4
[NPU] squeeze and unsqueeze op for ascend (#31452)
YuxiangLu Mar 8, 2021
738cbb9
Support npu kernel for gather op (#31458)
xymyeah Mar 9, 2021
01c7bf5
【NPU】add scale op for npu (#31499)
frankwhzhang Mar 10, 2021
33d16f2
Support TensorFormVector, TensorToVector of bool type (#31518)
zhiqiu Mar 10, 2021
8b47d68
【NPU】support npu kernel for fill_constant op (#31521)
frankwhzhang Mar 10, 2021
efa770c
cherry-pick 31422, solve conflict
xymyeah Mar 10, 2021
06dd58d
【NPU】Support npu kernel for matmul op (#31544)
frankwhzhang Mar 11, 2021
795c645
[NPU] Support npu op elementwise_mul and elementwise_mul_grad (#31571)
oyjxer Mar 12, 2021
4d79141
[NPU] Support npu op elementwise_max (#31574)
oyjxer Mar 12, 2021
ad2c58c
【NPU】add relu op for npu (#31515)
frankwhzhang Mar 12, 2021
9d2c9dc
【NPU】Suppert npu kernel for reshape2 op (#31524)
frankwhzhang Mar 12, 2021
991c03c
[NPU] Support npu kernel for gather op fix bug (#31541)
xymyeah Mar 12, 2021
4765645
[NPU] Support npu kernel for amp_check_finite_and_unscale_npu op (#31…
xymyeah Mar 12, 2021
c28885e
[NPU] accuracy op (#31492)
yinhaofeng Mar 12, 2021
20a165c
[NPU] add Assign OP (#31561)
OleNet Mar 12, 2021
3738ec7
[NPU] fix npu op elementwise_mul_grad (#31592)
oyjxer Mar 12, 2021
6096f9b
【NPU】Support npu op gelu and gelu_grad (#31530)
oyjxer Mar 12, 2021
a7625c0
[NPU] fix assgin cmake (#31595)
zhiqiu Mar 12, 2021
003eb59
fix gather_grad bug (#31607)
zhiqiu Mar 13, 2021
ba085b9
[NPU] add range op (#31560)
OleNet Mar 15, 2021
cbe00cb
【NPU】Support npu op elementwise_div and elementwise_div_grad (#31573)
oyjxer Mar 15, 2021
dc1f62b
[NPU] Support npu op log, log_grad, sqrt, sqrt_grad, square, tanh and…
oyjxer Mar 15, 2021
1ead5a0
[NPU] Support npu op logicalnot_op (#31534)
oyjxer Mar 15, 2021
c4423e6
[NPU] Support npu op elementwise_min (#31575)
oyjxer Mar 15, 2021
099cf9e
[NPU] Support npu op elementwise_pow (#31576)
oyjxer Mar 15, 2021
14d3009
[NPU] Support npu op table_lookup_v2 and table_lookup_v2_grad (#31399)
Meiyim Mar 15, 2021
07f3f5a
[NPU] support npu kernel for `less_than` (#31327)
Meiyim Mar 15, 2021
817a06e
[NPU] Support npu kernel scatter op (#31624)
oyxuan-11 Mar 15, 2021
2d5cd1e
[NPU] fix allocator min chunk size (#31632)
zhiqiu Mar 15, 2021
38419f5
[NPU] Support NPU kernel cast op (#31635)
oyxuan-11 Mar 15, 2021
1f4a044
[NPU] add npu kernel for sgd (#31639)
zhiqiu Mar 15, 2021
14f23be
【NPU】Support NPU kernel for reduce_sum op v2 (#31620)
pangyoki Mar 15, 2021
9ed9b8a
[NPU] add npu kernel for adam (#31644)
zhiqiu Mar 16, 2021
b7ba6a2
【NPU】Support npu kernel for mul op (#31584)
frankwhzhang Mar 16, 2021
cb2d7b2
[NPU] add npu kernel for softmax_with_cross_entropy (#31656)
zhiqiu Mar 16, 2021
a6c6a09
[NPU] add npu kernel for mean Op (#31562)
OleNet Mar 16, 2021
595e605
Revert "[NPU] add npu kernel for mean Op (#31562)" (#31665)
zhiqiu Mar 16, 2021
a0f7d11
【NPU】Add TensorCopy to NPU kernel for reduce_sum op (#31667)
pangyoki Mar 16, 2021
22b95a1
[NPU] Support npu op `expand` (#31405)
Meiyim Mar 16, 2021
ee43974
[NPU] fix shape of dx in mul_grad (#31675)
zhiqiu Mar 16, 2021
8c4b97c
[NPU] add Increment op (#31563)
OleNet Mar 17, 2021
41093f6
[NPU] add NPU add topk (#31596)
OleNet Mar 17, 2021
6d7aa89
[NPU] Support NPU kernel sum op (#31671)
oyxuan-11 Mar 17, 2021
5e8f854
[NPU] npu support `transpose` (#31486)
Meiyim Mar 17, 2021
e8acc60
cherry-pick 31564, solve conflict
OleNet Mar 17, 2021
993235b
[NPU] Fix bug: Fix calculation errors of pow grad npu kernel (#31699)
liym27 Mar 17, 2021
af99906
[NPU] Support testing grad of NPU ops in OpTest (#31697)
liym27 Mar 18, 2021
24ad385
[NPU] Support NPU kernel of stack op (#31711)
oyxuan-11 Mar 18, 2021
fc9554d
[NPU] Remove redundant ctest of top_k_op_npu_test (#31718)
liym27 Mar 18, 2021
07f97a4
[NPU] fix reshape npu op kernel (#31726)
zhiqiu Mar 18, 2021
3dcd6b5
[NPU] change transpose to transpose2 (#31734)
zhiqiu Mar 19, 2021
9d0f0c1
[NPU] Support mean npu kernel (#31729)
OleNet Mar 19, 2021
504163a
[NPU] fix some bugs of npu op (#31739)
zhiqiu Mar 19, 2021
76903ce
【NPU】Fix npu kernel elementwise_div_grad (#31753)
oyjxer Mar 20, 2021
d0d14d0
[NPU] fix the grad kernel diff bug of gather op (#31757)
xymyeah Mar 22, 2021
731d52c
【NPU】Fix reshape test & add grad test (#31776)
frankwhzhang Mar 22, 2021
2e4544d
[NPU] support fp16 for npu accuracy op (#31797)
zhiqiu Mar 23, 2021
db31a33
[NPU] support list of tensor input (#31801)
zhiqiu Mar 23, 2021
b761a75
[NPU] add npu kernel for concat op (#31695)
zhiqiu Mar 23, 2021
9acdf0d
[NPU] Support npu kernel for op elementwise_floordiv (#31822)
liym27 Mar 24, 2021
f674b39
[NPU] fix bug of lookup_table_v2_grad (#31834)
zhiqiu Mar 24, 2021
5bcfa8a
[NPU] support default stream (#31510)
zhiqiu Mar 24, 2021
25aa56b
[NPU] support mixed precision input for npu layer norm (#31847)
pangyoki Mar 24, 2021
7f6b837
【NPU】Support npu kernel for update_loss_scaling op (#31830)
pangyoki Mar 24, 2021
2f8a972
fix compile problem (#31850)
zhiqiu Mar 24, 2021
5f0944d
[NPU] support npu for conditional_block op (#31854)
zhiqiu Mar 25, 2021
661c4b2
【NPU】Add int dtype kernel for reshape2 op (#31864)
frankwhzhang Mar 25, 2021
399959e
[NPU] fix some op bugs (#31855)
zhiqiu Mar 25, 2021
1464a59
[NPU] support fp16 of input for api pow (#31871)
liym27 Mar 25, 2021
479009e
[NPU] add npu kernel for truncated_gaussian_random op (#31654)
zhiqiu Mar 25, 2021
9553a21
fix compile
frankwhzhang Apr 12, 2021
de3d052
fix compile
frankwhzhang Apr 12, 2021
8d94bd7
fix compile
frankwhzhang Apr 12, 2021
4a3cd69
fix compile
frankwhzhang Apr 12, 2021
3e5278a
fix compile
frankwhzhang Apr 12, 2021
2ac9f4b
fix compile
frankwhzhang Apr 12, 2021
c46f44f
fix code style
frankwhzhang Apr 12, 2021
85ae3e2
fix code style
frankwhzhang Apr 12, 2021
a5e35e2
fix code
frankwhzhang Apr 13, 2021
f032717
Fix op test (#32231)
liym27 Apr 13, 2021
ff622d8
fix conditional block (#32243)
zhiqiu Apr 13, 2021
7e9c4ef
fix style code
frankwhzhang Apr 14, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/external/gloo.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ cache_third_party(extern_gloo
TAG ${GLOO_TAG}
DIR GLOO_SOURCE_DIR)

if(WITH_ASCEND)
if(WITH_ASCEND OR WITH_ASCEND_CL)
ExternalProject_Add(
extern_gloo
${EXTERNAL_PROJECT_LOG_ARGS}
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/protobuf.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ endif()
)
ENDFUNCTION()

if(WITH_ASCEND)
if(WITH_ASCEND OR WITH_ASCEND_CL)
SET(PROTOBUF_VERSION 3.8.0)
else()
SET(PROTOBUF_VERSION 3.1.0)
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/threadpool.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ INCLUDE(ExternalProject)

SET(THREADPOOL_PREFIX_DIR ${THIRD_PARTY_PATH}/threadpool)
SET(THREADPOOL_SOURCE_DIR ${THIRD_PARTY_PATH}/threadpool/src/extern_threadpool)
if(WITH_ASCEND)
if(WITH_ASCEND OR WITH_ASCEND_CL)
SET(THREADPOOL_REPOSITORY https://gitee.com/tianjianhe/ThreadPool.git)
else()
SET(THREADPOOL_REPOSITORY ${GIT_URL}/progschj/ThreadPool.git)
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/warpctc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ cache_third_party(extern_warpctc
TAG ${WARPCTC_TAG}
DIR WARPCTC_SOURCE_DIR)

if(WITH_ASCEND)
if(WITH_ASCEND OR WITH_ASCEND_CL)
ExternalProject_Add(
extern_warpctc
${EXTERNAL_PROJECT_LOG_ARGS}
Expand Down
127 changes: 127 additions & 0 deletions paddle/fluid/framework/tensor_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ void TensorFromArray(const T* src, const size_t& array_size,
}
#endif
}

template <typename T>
void TensorFromVector(const std::vector<T>& src,
const platform::DeviceContext& ctx, Tensor* dst) {
Expand Down Expand Up @@ -167,6 +168,49 @@ void TensorFromVector(const std::vector<T>& src,
#endif
}

// The fully specialized function should be inline to avoid
// multi-definition.
template <>
inline void TensorFromVector(const std::vector<bool>& src,
const platform::DeviceContext& ctx, Tensor* dst) {
// vector<bool> has no data() member, use array instead.
// See details:
// https://stackoverflow.com/questions/46115669/why-does-stdvectorbool-have-no-data/46115714
bool* array = new bool[src.size()];
for (unsigned int i = 0; i < src.size(); i++) {
array[i] = static_cast<bool>(src[i]);
}

auto dst_place = ctx.GetPlace();
auto src_ptr = static_cast<const void*>(array);
platform::CPUPlace src_place;
dst->Resize({static_cast<int64_t>(src.size())});
auto dst_ptr = static_cast<void*>(dst->mutable_data<bool>(dst_place));
auto size = src.size() * sizeof(bool);

if (platform::is_cpu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr,
src_place, src_ptr, size);
}
#ifdef PADDLE_WITH_CUDA
else if (platform::is_gpu_place(dst_place)) { // NOLINT
memory::Copy(
BOOST_GET_CONST(platform::CUDAPlace, dst_place), dst_ptr, src_place,
src_ptr, size,
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
}
#endif
#ifdef PADDLE_WITH_ASCEND_CL
else if (platform::is_npu_place(dst_place)) { // NOLINT
memory::Copy(
BOOST_GET_CONST(platform::NPUPlace, dst_place), dst_ptr, src_place,
src_ptr, size,
reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream());
}
#endif
delete[] array;
}

template <typename T>
void TensorFromVector(const std::vector<T>& src, Tensor* dst) {
platform::CPUPlace dst_place = platform::CPUPlace();
Expand All @@ -179,6 +223,23 @@ void TensorFromVector(const std::vector<T>& src, Tensor* dst) {
memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
}

template <>
inline void TensorFromVector(const std::vector<bool>& src, Tensor* dst) {
bool* array = new bool[src.size()];
for (unsigned int i = 0; i < src.size(); i++) {
array[i] = static_cast<bool>(src[i]);
}
platform::CPUPlace dst_place = platform::CPUPlace();
auto src_ptr = static_cast<const void*>(array);
platform::CPUPlace src_place;
dst->Resize({static_cast<int64_t>(src.size())});
auto dst_ptr = static_cast<void*>(dst->mutable_data<bool>(dst_place));
auto size = src.size() * sizeof(bool);

memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
delete[] array;
}

template <typename T>
void TensorToVector(const Tensor& src, const platform::DeviceContext& ctx,
std::vector<T>* dst) {
Expand Down Expand Up @@ -212,6 +273,46 @@ void TensorToVector(const Tensor& src, const platform::DeviceContext& ctx,
#endif
}

template <>
inline void TensorToVector(const Tensor& src,
const platform::DeviceContext& ctx,
std::vector<bool>* dst) {
auto src_ptr = static_cast<const void*>(src.data<bool>());
auto size = src.numel() * sizeof(bool);

bool* array = new bool[src.numel()];

platform::CPUPlace dst_place;
dst->resize(src.numel());
auto dst_ptr = static_cast<void*>(array);

if (platform::is_cpu_place(src.place())) {
memory::Copy(dst_place, dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src.place()), src_ptr,
size);
}
#ifdef PADDLE_WITH_CUDA
else if (platform::is_gpu_place(src.place())) { // NOLINT
memory::Copy(
dst_place, dst_ptr, BOOST_GET_CONST(platform::CUDAPlace, src.place()),
src_ptr, size,
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
}
#endif
#ifdef PADDLE_WITH_ASCEND_CL
else if (platform::is_npu_place(src.place())) { // NOLINT
memory::Copy(
dst_place, dst_ptr, BOOST_GET_CONST(platform::NPUPlace, src.place()),
src_ptr, size,
reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream());
}
#endif
for (unsigned int i = 0; i < src.numel(); i++) {
(*dst)[i] = static_cast<bool>(array[i]);
}
delete[] array;
}

template <typename T>
void TensorToVector(const Tensor& src, std::vector<T>* dst) {
auto src_ptr = static_cast<const void*>(src.data<T>());
Expand All @@ -231,6 +332,32 @@ void TensorToVector(const Tensor& src, std::vector<T>* dst) {
BOOST_GET_CONST(platform::CPUPlace, src.place()), src_ptr, size);
}

template <>
inline void TensorToVector(const Tensor& src, std::vector<bool>* dst) {
auto src_ptr = static_cast<const void*>(src.data<bool>());
auto size = src.numel() * sizeof(bool);

bool* array = new bool[src.numel()];

platform::CPUPlace dst_place;
dst->resize(src.numel());
auto dst_ptr = static_cast<void*>(array);

PADDLE_ENFORCE_EQ(
platform::is_cpu_place(src.place()), true,
platform::errors::InvalidArgument(
"The input tensor should be CPU device, but actually it is in %s.",
src.place()));

memory::Copy(dst_place, dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src.place()), src_ptr, size);

for (unsigned int i = 0; i < src.numel(); i++) {
(*dst)[i] = static_cast<bool>(array[i]);
}
delete[] array;
}

std::ostream& operator<<(std::ostream& os, const Tensor& t);
} // namespace framework
} // namespace paddle
55 changes: 55 additions & 0 deletions paddle/fluid/framework/tensor_util_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,61 @@ TEST(TensorToVector, Tensor) {
#endif
}

TEST(TensorToVector, Tensor_bool) {
{
paddle::framework::Tensor src;
bool* src_ptr =
src.mutable_data<bool>({3, 3}, paddle::platform::CPUPlace());
for (int i = 0; i < 3 * 3; ++i) {
src_ptr[i] = static_cast<bool>(i % 2);
}

paddle::platform::CPUPlace place;
std::vector<bool> dst;
paddle::framework::TensorToVector<bool>(src, &dst);

for (int i = 0; i < 3 * 3; ++i) {
EXPECT_EQ(src_ptr[i], dst[i]);
}
}
#ifdef PADDLE_WITH_CUDA
{
std::vector<bool> src_vec = {
false, true, false, true, false, true, false, true, false,
};
paddle::framework::Tensor gpu_tensor;
paddle::platform::CUDAPlace place;
paddle::platform::CUDADeviceContext gpu_ctx(place);
paddle::framework::TensorFromVector<bool>(src_vec, gpu_ctx, &gpu_tensor);

std::vector<bool> dst;
paddle::framework::TensorToVector<bool>(gpu_tensor, gpu_ctx, &dst);

for (int i = 0; i < 3 * 3; ++i) {
EXPECT_EQ(src_vec[i], dst[i]);
}
}
#endif
#ifdef PADDLE_WITH_ASCEND_CL
{
std::vector<bool> src_vec = {
false, true, false, true, false, true, false, true, false,
};
paddle::framework::Tensor npu_tensor;
paddle::platform::NPUPlace place(0);
paddle::platform::NPUDeviceContext npu_ctx(place);
paddle::framework::TensorFromVector<bool>(src_vec, npu_ctx, &npu_tensor);

std::vector<bool> dst;
paddle::framework::TensorToVector<bool>(npu_tensor, npu_ctx, &dst);

for (int i = 0; i < 3 * 3; ++i) {
EXPECT_EQ(src_vec[i], dst[i]);
}
}
#endif
}

TEST(TensorFromDLPack, Tensor) {
{
std::vector<int> src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9};
Expand Down
11 changes: 11 additions & 0 deletions paddle/fluid/framework/type_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,17 @@ using Attribute = boost::variant<

using AttributeMap = std::unordered_map<std::string, Attribute>;

#ifdef PADDLE_WITH_ASCEND_CL
using NPUAttribute =
boost::variant<boost::blank, int, float, std::string, std::vector<int>,
std::vector<float>, std::vector<std::string>, bool,
std::vector<bool>, BlockDesc*, int64_t,
std::vector<BlockDesc*>, std::vector<int64_t>,
std::vector<double>, std::vector<std::vector<int64_t>>>;

using NPUAttributeMap = std::unordered_map<std::string, NPUAttribute>;
#endif

using OpCreator = std::function<OperatorBase*(
const std::string& /*type*/, const VariableNameMap& /*inputs*/,
const VariableNameMap& /*outputs*/, const AttributeMap& /*attrs*/)>;
Expand Down
16 changes: 16 additions & 0 deletions paddle/fluid/memory/memcpy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,16 @@ void Copy<platform::NPUPlace, platform::CPUPlace>(platform::NPUPlace dst_place,
if (UNLIKELY(num == 0)) return;

platform::SetNPUDeviceId(dst_place.device);

// NOTE(ascendrc): NPU memcpy async from host to device is a "real" async,
// which is different from CUDA. In Paddle, when async is called, "sync"
// is run actually, which means Paddle doesn't fully supported async.
// TODO(ascendrc): Support NPU memcpy async for better performance.
stream = nullptr;

VLOG(4) << "memory::Copy " << num << " Bytes from " << src_place << " to "
<< dst_place << " by thream(" << stream << ")";

if (stream) {
platform::RecordEvent record_event("NpuMemcpyAsync:CPU->NPU");
platform::NPUMemcpyAsync(dst, src, num, ACL_MEMCPY_HOST_TO_DEVICE, stream);
Expand All @@ -226,8 +234,16 @@ void Copy<platform::CPUPlace, platform::NPUPlace>(platform::CPUPlace dst_place,
if (UNLIKELY(num == 0)) return;

platform::SetNPUDeviceId(src_place.device);

// NOTE(ascendrc): NPU memcpy async from device to host is a "real" async,
// which is different from CUDA. In Paddle, when async is called, "sync"
// is run actually, which means Paddle doesn't fully supported async.
// TODO(ascendrc): Support NPU memcpy async for better performance.
stream = nullptr;

VLOG(4) << "memory::Copy " << num << " Bytes from " << src_place << " to "
<< dst_place << " by thream(" << stream << ")";

if (stream) {
platform::RecordEvent record_event("NpuMemcpyAsync:NPU->CPU");
platform::NPUMemcpyAsync(dst, src, num, ACL_MEMCPY_DEVICE_TO_HOST, stream);
Expand Down
16 changes: 15 additions & 1 deletion paddle/fluid/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ if (WITH_ASCEND)
endif()

if (WITH_ASCEND_CL)
cc_test(assign_op_npu_test SRCS assign_op_npu_test.cc DEPS assign_op)
cc_library(npu_op_runner SRCS npu_op_runner.cc DEPS operator npu_info)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} npu_op_runner)
endif()
Expand All @@ -141,8 +142,8 @@ set(OPERATOR_DEPS ${OPERATOR_DEPS} ${COMMON_OP_DEPS})
set(GLOB_OPERATOR_DEPS ${OPERATOR_DEPS} CACHE INTERNAL "Global Op dependencies")

cc_test(test_common_infer_shape_functions SRCS test_common_infer_shape_functions.cc DEPS common_infer_shape_functions ${COMMON_OP_DEPS} activation_op elementwise_add_op softmax_op softmax)
cc_test(assign_op_test SRCS assign_op_test.cc DEPS assign_op)
cc_test(gather_test SRCS gather_test.cc DEPS tensor)
cc_test(assign_op_test SRCS assign_op_test.cc DEPS assign_op)
cc_test(scatter_test SRCS scatter_test.cc DEPS tensor math_function)
cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor)
cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor memory)
Expand All @@ -163,10 +164,19 @@ if (WITH_PYTHON)
cc_library(py_func_op SRCS py_func_op.cc DEPS op_registry python pybind)
endif()

if (WITH_ASCEND_CL)
cc_test(range_op_npu_test SRCS range_op_npu_test.cc DEPS op_registry range_op scope device_context enforce executor)
cc_test(lookup_table_v2_op_npu_test SRCS lookup_table_v2_op_npu_test.cc DEPS op_registry lookup_table_v2_op scope device_context enforce executor compare_op)
endif()

set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")
add_subdirectory(benchmark)

cc_test(op_debug_string_test SRCS op_debug_string_test.cc DEPS elementwise_add_op)
if (WITH_ASCEND_CL)
cc_test(transpose_op_npu_test SRCS transpose_op_npu_test.cc DEPS op_registry transpose_op scope device_context enforce executor)
endif()


if(WITH_MKLDNN)
include(mkldnn/inplace_op_tests.cmake)
Expand All @@ -180,3 +190,7 @@ if(WITH_UNITY_BUILD)
# The specified link dependency needs to be displayed here.
target_link_libraries(paddle_operators_unity ${OP_HEADER_DEPS} ${COMMON_OP_DEPS})
endif()

if(WITH_ASCEND_CL)
cc_test(gelu_op_npu_test SRCS gelu_op_npu_test.cc DEPS op_registry gelu_op scope device_context enforce executor)
endif()
Loading