Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions cmake/anakin_subgraph.cmake
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
if(NOT WITH_GPU)
return()
endif()

set(ANAKIN_ROOT "/usr" CACHE PATH "ANAKIN ROOT")
find_path(ANAKIN_INCLUDE_DIR anakin_config.h
PATHS ${ANAKIN_ROOT} ${ANAKIN_ROOT}/include
Expand All @@ -16,9 +12,7 @@ find_library(ANAKIN_LIBRARY NAMES libanakin_saber_common.so libanakin.so
DOC "Path to ANAKIN library.")

if(ANAKIN_INCLUDE_DIR AND ANAKIN_LIBRARY)
if(WITH_DSO)
set(ANAKIN_FOUND ON)
endif(WITH_DSO)
else()
set(ANAKIN_FOUND OFF)
endif()
Expand All @@ -31,3 +25,8 @@ if(ANAKIN_FOUND)
link_directories(${ANAKIN_ROOT})
add_definitions(-DPADDLE_WITH_ANAKIN)
endif()

if(ANAKIN_FOUND AND WITH_GPU AND WITH_DSO)
message(STATUS "Compile with anakin subgraph.")
set(ANAKIN_SUBGRAPH ON)
endif()
2 changes: 1 addition & 1 deletion paddle/fluid/framework/ir/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ pass_library(fillconstant_elementwisemul_fuse inference)
pass_library(shuffle_channel_detect_pass inference)
pass_library(delete_quant_dequant_op_pass inference)

if(ANAKIN_FOUND)
if(ANAKIN_SUBGRAPH)
pass_library(simplify_anakin_priorbox_detection_out_pass inference)
endif()

Expand Down
8 changes: 6 additions & 2 deletions paddle/fluid/inference/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ if (TENSORRT_FOUND)
add_subdirectory(tensorrt)
endif()

if (ANAKIN_FOUND)
if (ANAKIN_SUBGRAPH)
add_subdirectory(anakin)
endif()

Expand All @@ -43,11 +43,15 @@ if(WITH_MKLDNN)
endif()

set(STATIC_INFERENCE_APIS paddle_fluid_api paddle_inference_api analysis_predictor)
if (ANAKIN_FOUND)
set(ANAKIN_SHARED_INFERENCE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/api/api_anakin_engine.cc)
endif()
set(SHARED_INFERENCE_SRCS
io.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api_impl.cc
${CMAKE_CURRENT_SOURCE_DIR}/api/analysis_predictor.cc
${mkldnn_quantizer_src}
${CMAKE_CURRENT_SOURCE_DIR}/api/details/zero_copy_tensor.cc)
${CMAKE_CURRENT_SOURCE_DIR}/api/details/zero_copy_tensor.cc
${ANAKIN_SHARED_INFERENCE_SRCS})

if(WIN32)
sep_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/inference/anakin/convert/elementwise.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ void ElementwiseMulOpConverter<TargetT, PrecisionT>::operator()(
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();

this->engine_->AddOp(op_name, "Eltwise", {x_name, y_name}, {out_name});
std::string elementwise_type = "Prod";
std::string elementwise_type = "Mul";
this->engine_->template AddOpAttr<std::string>(op_name, "type",
elementwise_type);
std::vector<float> coeff = {1.0, 1.0};
Expand Down
27 changes: 16 additions & 11 deletions paddle/fluid/inference/anakin/convert/op_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,12 @@ template class AnakinOpConverter<::anakin::saber::NV,
::anakin::Precision::FP32>;
template class AnakinOpConverter<::anakin::saber::NV,
::anakin::Precision::INT8>;

#ifdef ANAKIN_X86_PLACE
template class AnakinOpConverter<::anakin::saber::X86,
::anakin::Precision::FP32>;
template class AnakinOpConverter<::anakin::saber::X86,
::anakin::Precision::INT8>;
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
Expand Down Expand Up @@ -203,16 +204,16 @@ template class AnakinOpConverter<::anakin::saber::X86,
CPU, ::anakin::saber::X86, precision_type__, \
::anakin::Precision::precision_type__)

#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && defined(ANAKIN_X86_PLACE)
#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \
REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8); \
REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \
REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8)
#else
#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \
REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8)
#elif defined(PADDLE_WITH_CUDA)
#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \
REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8)
#endif

#define USE_ANAKIN_CONVERTER_BASE(op_type__, place_type__, precision_type__) \
Expand All @@ -221,12 +222,16 @@ template class AnakinOpConverter<::anakin::saber::X86,
__attribute__((unused)) = \
Touch_anakin_##op_type__##_##place_type__##_##precision_type__();

#if defined(PADDLE_WITH_CUDA) && defined(ANAKIN_X86_PLACE)
#define USE_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, FP32) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, FP32)
#define USE_INT8_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, INT8) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, INT8)
#elif defined(PADDLE_WITH_CUDA)
#define USE_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, FP32)
#define USE_INT8_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, INT8)

#define USE_CPU_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, FP32)
#define USE_CPU_INT8_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, INT8)
#endif
32 changes: 0 additions & 32 deletions paddle/fluid/inference/anakin/convert/test_activation_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,32 +77,6 @@ TEST(swish_op, gpu) {
}
#endif

/*
TEST(sigm_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("sigmoid", ctx, false);
}

TEST(tanh_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("tanh", ctx, false);
}

TEST(relu6_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("relu6", ctx, false);
}

TEST(swish_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_activation_op<::anakin::saber::X86>("swish", ctx, false);
}
*/

} // namespace anakin
} // namespace inference
} // namespace paddle
Expand All @@ -112,13 +86,7 @@ USE_OP(tanh);
USE_OP(relu6);
USE_OP(swish);

USE_CPU_ANAKIN_CONVERTER(sigmoid);
USE_CPU_ANAKIN_CONVERTER(tanh);
USE_CPU_ANAKIN_CONVERTER(relu6);
USE_CPU_ANAKIN_CONVERTER(swish);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(sigmoid);
USE_ANAKIN_CONVERTER(tanh);
USE_ANAKIN_CONVERTER(relu6);
USE_ANAKIN_CONVERTER(swish);
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -57,19 +57,16 @@ TEST(affine_channel_op, gpu) {
test_affine_channel_op<::anakin::saber::NV>(ctx, true);
}
#endif

#ifdef ANAKIN_X86_PLACE
TEST(affine_channel_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_affine_channel_op<::anakin::saber::X86>(ctx, false);
}

#endif
} // namespace anakin
} // namespace inference
} // namespace paddle

USE_OP(affine_channel);
USE_CPU_ANAKIN_CONVERTER(affine_channel);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(affine_channel);
#endif
8 changes: 2 additions & 6 deletions paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,19 +73,15 @@ TEST(batch_norm_op, gpu) {
test_batchnorm_op<::anakin::saber::NV>(ctx, true);
}
#endif

#ifdef ANAKIN_X86_PLACE
TEST(batch_norm_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_batchnorm_op<::anakin::saber::X86>(ctx, false);
}

#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(batch_norm);
USE_CPU_ANAKIN_CONVERTER(batch_norm);

#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(batch_norm);
#endif
8 changes: 2 additions & 6 deletions paddle/fluid/inference/anakin/convert/test_concat_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,15 @@ TEST(concat_op, gpu) {
test_concat_op<::anakin::saber::NV>(ctx, true);
}
#endif

#ifdef ANAKIN_X86_PLACE
TEST(concat_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_concat_op<::anakin::saber::X86>(ctx, false);
}

#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(concat);
USE_CPU_ANAKIN_CONVERTER(concat);

#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(concat);
#endif
8 changes: 2 additions & 6 deletions paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,20 +60,16 @@ TEST(conv2d_op, gpu) {
test_conv2d_op<::anakin::saber::NV>(ctx, true);
}
#endif

#ifdef ANAKIN_X86_PLACE
TEST(conv2d_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_conv2d_op<::anakin::saber::X86>(ctx, false);
}

#endif
} // namespace anakin
} // namespace inference
} // namespace paddle

USE_OP(conv2d);
USE_CPU_ANAKIN_CONVERTER(conv2d);

#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(conv2d);
#endif
7 changes: 2 additions & 5 deletions paddle/fluid/inference/anakin/convert/test_dropout_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,19 +54,16 @@ TEST(dropout_op, gpu) {
test_dropout_op<::anakin::saber::NV>(ctx, true);
}
#endif

#ifdef ANAKIN_X86_PLACE
TEST(dropout_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_dropout_op<::anakin::saber::X86>(ctx, false);
}

#endif
} // namespace anakin
} // namespace inference
} // namespace paddle

USE_OP(dropout);
USE_CPU_ANAKIN_CONVERTER(dropout);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(dropout);
#endif
10 changes: 2 additions & 8 deletions paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,29 +59,23 @@ TEST(elementwise_op, native_mul_gpu) {
test_elementwise_op<::anakin::saber::NV>("elementwise_mul", ctx, true);
}
#endif

#ifdef ANAKIN_X86_PLACE
TEST(elementwise_op, native_add_cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_elementwise_op<::anakin::saber::X86>("elementwise_add", ctx, false);
}

TEST(elementwise_op, native_mul_cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_elementwise_op<::anakin::saber::X86>("elementwise_mul", ctx, false);
}

#endif
} // namespace anakin
} // namespace inference
} // namespace paddle

USE_OP(elementwise_add);
USE_OP(elementwise_mul);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(elementwise_add);
USE_ANAKIN_CONVERTER(elementwise_mul);
#endif

USE_CPU_ANAKIN_CONVERTER(elementwise_add);
USE_CPU_ANAKIN_CONVERTER(elementwise_mul);
7 changes: 2 additions & 5 deletions paddle/fluid/inference/anakin/convert/test_fc_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,19 +49,16 @@ TEST(mul_op, gpu) {
test_mul_op<::anakin::saber::NV>(ctx, true);
}
#endif

#ifdef ANAKIN_X86_PLACE
TEST(mul_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_mul_op<::anakin::saber::X86>(ctx, false);
}

#endif
} // namespace anakin
} // namespace inference
} // namespace paddle

USE_OP(mul);
USE_CPU_ANAKIN_CONVERTER(fc);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(fc);
#endif
7 changes: 2 additions & 5 deletions paddle/fluid/inference/anakin/convert/test_flatten_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,20 +48,17 @@ TEST(flatten_op, gpu) {
test_flatten_op<::anakin::saber::NV>(ctx, true);
}
#endif

#ifdef ANAKIN_X86_PLACE
TEST(flatten_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_flatten_op<::anakin::saber::X86>(ctx, false);
}

#endif
} // namespace anakin
} // namespace inference
} // namespace paddle

USE_OP(reshape);
USE_OP_ITSELF(flatten);
USE_CPU_ANAKIN_CONVERTER(flatten);
#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(flatten);
#endif
8 changes: 2 additions & 6 deletions paddle/fluid/inference/anakin/convert/test_pool2d_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ TEST(Pool2dOpConverter, avg_ceil_test) {
test_pool2d<::anakin::saber::NV>(ctx, true, false, true, "avg");
}
#endif

#ifdef ANAKIN_X86_PLACE
TEST(Pool2dOpConverter, normal_cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
Expand All @@ -110,14 +110,10 @@ TEST(Pool2dOpConverter, avg_ceil_test_cpu) {
platform::CPUDeviceContext ctx(cpu_place);
test_pool2d<::anakin::saber::X86>(ctx, false, false, true, "avg");
}

#endif
} // namespace anakin
} // namespace inference
} // namespace paddle

USE_OP(pool2d);
USE_CPU_ANAKIN_CONVERTER(pool2d);

#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(pool2d);
#endif
5 changes: 0 additions & 5 deletions paddle/fluid/inference/anakin/convert/test_relu_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,5 @@ TEST(leaky_relu_op, gpu) {

USE_OP(relu);
USE_OP(leaky_relu);
USE_CPU_ANAKIN_CONVERTER(relu);
USE_CPU_ANAKIN_CONVERTER(leaky_relu);

#ifdef PADDLE_WITH_CUDA
USE_ANAKIN_CONVERTER(relu);
USE_ANAKIN_CONVERTER(leaky_relu);
#endif
Loading