Skip to content

Commit 10e5304

Browse files
authored
[Inference] Support NNAdapter and ascend310 (#35226)
1 parent 5cda6b2 commit 10e5304

File tree

15 files changed

+310
-9
lines changed

15 files changed

+310
-9
lines changed

cmake/external/lite.cmake

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,14 @@ if (LITE_WITH_XPU)
3535
ENDIF()
3636
endif()
3737

38+
if (LITE_WITH_NNADAPTER)
39+
add_definitions(-DLITE_SUBGRAPH_WITH_NNADAPTER)
40+
if (NNADAPTER_WITH_HUAWEI_ASCEND_NPU)
41+
add_definitions(-DLITE_SUBGRAPH_WITH_NPU)
42+
set(NPU_SDK_ROOT "/usr/local/Ascend/ascend-toolkit/latest" CACHE STRING "default NPU SDK ROOT")
43+
endif()
44+
endif()
45+
3846
if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
3947
include(ExternalProject)
4048
set(LITE_PROJECT extern_lite)
@@ -67,6 +75,9 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
6775
-DLITE_WITH_XPU=${LITE_WITH_XPU}
6876
-DXPU_SDK_URL=${XPU_BASE_URL}
6977
-DXPU_SDK_ENV=${XPU_SDK_ENV}
78+
-DLITE_WITH_NNADAPTER=${LITE_WITH_NNADAPTER}
79+
-DNNADAPTER_WITH_HUAWEI_ASCEND_NPU=${NNADAPTER_WITH_HUAWEI_ASCEND_NPU}
80+
-DNNADAPTER_HUAWEI_ASCEND_NPU_SDK_ROOT=${NPU_SDK_ROOT}
7081
-DLITE_WITH_CODE_META_INFO=OFF
7182
-DLITE_WITH_ARM=ON)
7283
ExternalProject_Add(
@@ -110,6 +121,9 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
110121
-DLITE_WITH_XPU=${LITE_WITH_XPU}
111122
-DXPU_SDK_URL=${XPU_BASE_URL}
112123
-DXPU_SDK_ENV=${XPU_SDK_ENV}
124+
-DLITE_WITH_NNADAPTER=${LITE_WITH_NNADAPTER}
125+
-DNNADAPTER_WITH_HUAWEI_ASCEND_NPU=${NNADAPTER_WITH_HUAWEI_ASCEND_NPU}
126+
-DNNADAPTER_HUAWEI_ASCEND_NPU_SDK_ROOT=${NPU_SDK_ROOT}
113127
-DLITE_WITH_CODE_META_INFO=OFF
114128
-DLITE_WITH_ARM=OFF)
115129

@@ -120,6 +134,7 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
120134
GIT_TAG ${LITE_GIT_TAG}
121135
PREFIX ${LITE_SOURCES_DIR}
122136
UPDATE_COMMAND ""
137+
PATCH_COMMAND sed -i "s?NNadapter_bridges_path = os.path.abspath('..')+\"\/lite\/kernels\/nnadapter\/bridges\/paddle_use_bridges.h\"?NNadapter_bridges_path = os.path.abspath(\'..\')+\"\/extern_lite\/lite\/kernels\/nnadapter\/bridges\/paddle_use_bridges.h\"?" ${LITE_SOURCES_DIR}/src/extern_lite//lite/tools/cmake_tools/record_supported_kernel_op.py && sed -i "/general::ssa::ConvertToSSA(cpp_prog)$<SEMICOLON>/d" ${LITE_SOURCES_DIR}/src/extern_lite/lite/model_parser/model_parser.cc
123138
BUILD_COMMAND ${LITE_BUILD_COMMAND}
124139
INSTALL_COMMAND ""
125140
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
@@ -146,6 +161,11 @@ endif()
146161
if (WITH_ARM)
147162
if(LITE_WITH_XPU)
148163
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.xpu)
164+
elseif(LITE_WITH_NNADAPTER)
165+
message("Enable LITE_WITH_NNADAPTER")
166+
if (NNADAPTER_WITH_HUAWEI_ASCEND_NPU)
167+
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.nnadapter)
168+
endif()
149169
else()
150170
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8)
151171
endif()
@@ -174,5 +194,16 @@ endfunction()
174194
external_lite_libs(lite_full_static ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so)
175195
set(LITE_SHARED_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so)
176196

197+
if (LITE_WITH_NNADAPTER)
198+
set(LITE_NNADAPTER_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so)
199+
if (NNADAPTER_WITH_HUAWEI_ASCEND_NPU)
200+
external_lite_libs(lite_nnadapter ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter_driver_huawei_ascend_npu.so)
201+
set(LITE_DEPS lite_full_static lite_nnadapter)
202+
set(LITE_NNADAPTER_NPU_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter_driver_huawei_ascend_npu.so)
203+
endif()
204+
else()
205+
set(LITE_DEPS lite_full_static)
206+
endif()
207+
177208
add_definitions(-DPADDLE_WITH_LITE)
178209
add_definitions(-DLITE_WITH_LOG)

paddle/fluid/inference/analysis/argument.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,22 @@ struct Argument {
239239
DECL_ARGUMENT_FIELD(xpu_precision, XpuPrecision, std::string);
240240
DECL_ARGUMENT_FIELD(xpu_adaptive_seqlen, XpuAdaptiveSeqlen, bool);
241241

242+
DECL_ARGUMENT_FIELD(use_nnadapter, UseNNAdapter, bool);
243+
DECL_ARGUMENT_FIELD(nnadapter_model_cache_dir, NNAdapterModelCacheDir,
244+
std::string);
245+
DECL_ARGUMENT_FIELD(nnadapter_device_names, NNAdapterDeviceNames,
246+
std::vector<std::string>);
247+
DECL_ARGUMENT_FIELD(nnadapter_context_properties, NNAdapterContextProperties,
248+
std::string);
249+
DECL_ARGUMENT_FIELD(nnadapter_subgraph_partition_config_buffer,
250+
NNAdapterSubgraphPartitionConfigBuffer, std::string);
251+
DECL_ARGUMENT_FIELD(nnadapter_subgraph_partition_config_path,
252+
NNAdapterSubgraphPartitionConfigPath, std::string);
253+
DECL_ARGUMENT_FIELD(nnadapter_model_cache_token, NNAdapterModelCacheToken,
254+
std::vector<std::string>);
255+
DECL_ARGUMENT_FIELD(nnadapter_model_cache_buffer, NNAdapterModelCacheBuffer,
256+
std::vector<std::vector<char>>);
257+
242258
// Memory optimized related.
243259
DECL_ARGUMENT_FIELD(enable_memory_optim, EnableMemoryOptim, bool);
244260

paddle/fluid/inference/analysis/ir_pass_manager.cc

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,27 @@ void IRPassManager::CreatePasses(Argument *argument,
202202
new std::string(argument->xpu_autotune_file()));
203203
pass->Set("precision", new std::string(argument->xpu_precision()));
204204
pass->Set("adaptive_seqlen", new bool(argument->xpu_adaptive_seqlen()));
205+
// NNAdapter Related
206+
pass->Set("use_nnadapter", new bool(argument->use_nnadapter()));
207+
pass->Set("nnadapter_model_cache_dir",
208+
new std::string(argument->nnadapter_model_cache_dir()));
209+
pass->Set(
210+
"nnadapter_device_names",
211+
new std::vector<std::string>(argument->nnadapter_device_names()));
212+
pass->Set("nnadapter_context_properties",
213+
new std::string(argument->nnadapter_context_properties()));
214+
pass->Set("nnadapter_subgraph_partition_config_buffer",
215+
new std::string(
216+
argument->nnadapter_subgraph_partition_config_buffer()));
217+
pass->Set("nnadapter_subgraph_partition_config_path",
218+
new std::string(
219+
argument->nnadapter_subgraph_partition_config_path()));
220+
pass->Set("nnadapter_model_cache_buffer",
221+
new std::vector<std::vector<char>>(
222+
argument->nnadapter_model_cache_buffer()));
223+
pass->Set("nnadapter_model_cache_token",
224+
new std::vector<std::string>(
225+
argument->nnadapter_model_cache_token()));
205226
}
206227
disable_logs_ = argument->disable_logs();
207228
if (pass_name == "fc_fuse_pass") {

paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,12 +250,30 @@ void LiteSubgraphPass::SetUpEngine(
250250
std::string autotune_file = Get<std::string>("autotune_file");
251251
std::string precision = Get<std::string>("precision");
252252
bool adaptive_seqlen = Get<bool>("adaptive_seqlen");
253+
// NNAdapter Related
254+
bool use_nnadapter = Get<bool>("use_nnadapter");
255+
std::string nnadapter_model_cache_dir =
256+
Get<std::string>("nnadapter_model_cache_dir");
257+
auto nnadapter_device_names =
258+
Get<std::vector<std::string>>("nnadapter_device_names");
259+
std::string nnadapter_context_properties =
260+
Get<std::string>("nnadapter_context_properties");
261+
std::string nnadapter_subgraph_partition_config_buffer =
262+
Get<std::string>("nnadapter_subgraph_partition_config_buffer");
263+
std::string nnadapter_subgraph_partition_config_path =
264+
Get<std::string>("nnadapter_subgraph_partition_config_path");
265+
auto nnadapter_model_cache_buffer =
266+
Get<std::vector<std::vector<char>>>("nnadapter_model_cache_buffer");
267+
auto nnadapter_model_cache_token =
268+
Get<std::vector<std::string>>("nnadapter_model_cache_token");
253269

254270
lite_api::TargetType target_type;
255271
if (use_gpu) {
256272
target_type = TARGET(kCUDA);
257273
} else if (use_xpu) {
258274
target_type = TARGET(kXPU);
275+
} else if (use_nnadapter) {
276+
target_type = TARGET(kNNAdapter);
259277
} else {
260278
#ifdef PADDLE_WITH_ARM
261279
target_type = TARGET(kARM);
@@ -292,6 +310,17 @@ void LiteSubgraphPass::SetUpEngine(
292310
config.autotune_file = autotune_file;
293311
config.precision = precision;
294312
config.adaptive_seqlen = adaptive_seqlen;
313+
// NNAdapter Related
314+
config.nnadapter_model_cache_dir = nnadapter_model_cache_dir;
315+
config.nnadapter_device_names = nnadapter_device_names;
316+
config.nnadapter_context_properties = nnadapter_context_properties;
317+
config.nnadapter_subgraph_partition_config_buffer =
318+
nnadapter_subgraph_partition_config_buffer;
319+
config.nnadapter_subgraph_partition_config_path =
320+
nnadapter_subgraph_partition_config_path;
321+
config.nnadapter_model_cache_buffer = nnadapter_model_cache_buffer;
322+
config.nnadapter_model_cache_token = nnadapter_model_cache_token;
323+
295324
if (dump_model) {
296325
lite::StrToBinaryFile("./model.bin", config.model);
297326
lite::StrToBinaryFile("./param.bin", config.param);

paddle/fluid/inference/api/analysis_config.cc

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
207207
// NPU related.
208208
CP_MEMBER(use_npu_);
209209
CP_MEMBER(npu_device_id_);
210+
CP_MEMBER(nnadapter_config_);
210211

211212
// profile related.
212213
CP_MEMBER(with_profile_);
@@ -554,7 +555,7 @@ void AnalysisConfig::Update() {
554555
}
555556

556557
if (use_npu_) {
557-
#ifdef PADDLE_WITH_ASCEND_CL
558+
#if defined(PADDLE_WITH_ASCEND_CL) || defined(LITE_SUBGRAPH_WITH_NPU)
558559
PADDLE_ENFORCE_EQ(use_gpu_, false,
559560
platform::errors::Unavailable(
560561
"Currently, NPU and GPU cannot be enabled in the "
@@ -833,6 +834,61 @@ std::string AnalysisConfig::Summary() {
833834
return os.PrintTable();
834835
}
835836

837+
LiteNNAdapterConfig &LiteNNAdapterConfig::SetDeviceNames(
838+
const std::vector<std::string> &names) {
839+
nnadapter_device_names = names;
840+
return *this;
841+
}
842+
843+
LiteNNAdapterConfig &LiteNNAdapterConfig::SetContextProperties(
844+
const std::string &properties) {
845+
nnadapter_context_properties = properties;
846+
return *this;
847+
}
848+
849+
LiteNNAdapterConfig &LiteNNAdapterConfig::SetModelCacheDir(
850+
const std::string &dir) {
851+
nnadapter_model_cache_dir = dir;
852+
return *this;
853+
}
854+
855+
LiteNNAdapterConfig &LiteNNAdapterConfig::SetModelCacheBuffers(
856+
const std::string &model_cache_token,
857+
const std::vector<char> &model_cache_buffer) {
858+
PADDLE_ENFORCE_EQ(model_cache_token.empty(), false,
859+
platform::errors::InvalidArgument(
860+
"model_cache_token should not be empty."));
861+
PADDLE_ENFORCE_EQ(model_cache_buffer.empty(), false,
862+
platform::errors::InvalidArgument(
863+
"model_cache_buffer should not be empty."));
864+
PADDLE_ENFORCE_EQ(nnadapter_model_cache_buffers.count(model_cache_token),
865+
false, platform::errors::InvalidArgument(
866+
"model_cache_token has already been set."));
867+
868+
nnadapter_model_cache_buffers[model_cache_token] = model_cache_buffer;
869+
return *this;
870+
}
871+
872+
LiteNNAdapterConfig &LiteNNAdapterConfig::SetSubgraphPartitionConfigPath(
873+
const std::string &path) {
874+
nnadapter_subgraph_partition_config_path = path;
875+
return *this;
876+
}
877+
878+
LiteNNAdapterConfig &LiteNNAdapterConfig::SetSubgraphPartitionConfigBuffer(
879+
const std::string &buffer) {
880+
nnadapter_subgraph_partition_config_buffer = buffer;
881+
return *this;
882+
}
883+
LiteNNAdapterConfig &LiteNNAdapterConfig::Enable() {
884+
use_nnadapter = true;
885+
return *this;
886+
}
887+
LiteNNAdapterConfig &LiteNNAdapterConfig::Disable() {
888+
use_nnadapter = false;
889+
return *this;
890+
}
891+
836892
void AnalysisConfig::CollectShapeRangeInfo(
837893
const std::string &shape_range_info_path) {
838894
LOG(INFO) << "In CollectShapeInfo mode, we will disable optimizations and "

paddle/fluid/inference/api/analysis_predictor.cc

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,22 @@ bool AnalysisPredictor::CreateExecutor() {
276276
"You tried to use NPU forward propagation, but Paddle was not compiled "
277277
"with WITH_ASCEND_CL."));
278278
#endif
279+
} else if (config_.NNAdapter().use_nnadapter) {
280+
if (config_.lite_engine_enabled()) {
281+
place_ = paddle::platform::CPUPlace();
282+
#ifndef LITE_SUBGRAPH_WITH_NNADAPTER
283+
PADDLE_THROW(
284+
platform::errors::Unavailable("You tried to use an NNAdapter lite "
285+
"engine, but Paddle was not compiled "
286+
"with it."));
287+
#endif // LITE_SUBGRAPH_WITH_NNADAPTER
288+
} else {
289+
PADDLE_THROW(
290+
platform::errors::Unavailable("You tried to use NNadapter forward "
291+
"propagation (inference without lite "
292+
"engine), but Paddle was not compiled "
293+
"with LITE_WITH_NNADAPTER."));
294+
}
279295
} else {
280296
place_ = paddle::platform::CPUPlace();
281297
}
@@ -601,6 +617,26 @@ void AnalysisPredictor::PrepareArgument() {
601617
argument_.SetXpuAutotuneFile(config_.xpu_autotune_file_);
602618
argument_.SetXpuPrecision(config_.xpu_precision_);
603619
argument_.SetXpuAdaptiveSeqlen(config_.xpu_adaptive_seqlen_);
620+
// NNAdapter related
621+
argument_.SetUseNNAdapter(config_.NNAdapter().use_nnadapter);
622+
argument_.SetNNAdapterDeviceNames(
623+
config_.NNAdapter().nnadapter_device_names);
624+
argument_.SetNNAdapterContextProperties(
625+
config_.NNAdapter().nnadapter_context_properties);
626+
argument_.SetNNAdapterModelCacheDir(
627+
config_.NNAdapter().nnadapter_model_cache_dir);
628+
argument_.SetNNAdapterSubgraphPartitionConfigBuffer(
629+
config_.NNAdapter().nnadapter_subgraph_partition_config_buffer);
630+
argument_.SetNNAdapterSubgraphPartitionConfigPath(
631+
config_.NNAdapter().nnadapter_subgraph_partition_config_path);
632+
std::vector<std::string> buffer_keys;
633+
std::vector<std::vector<char>> buffer_vals;
634+
for (auto it : config_.NNAdapter().nnadapter_model_cache_buffers) {
635+
buffer_keys.emplace_back(it.first);
636+
buffer_vals.emplace_back(it.second);
637+
}
638+
argument_.SetNNAdapterModelCacheToken(buffer_keys);
639+
argument_.SetNNAdapterModelCacheBuffer(buffer_vals);
604640
LOG(INFO) << "Lite subgraph engine is enabled";
605641
}
606642

paddle/fluid/inference/api/analysis_predictor_tester.cc

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,26 @@ TEST(AnalysisPredictor, analysis_off) {
6161
ASSERT_TRUE(predictor->Run(inputs, &outputs));
6262
}
6363

64+
#ifndef WIN32
65+
TEST(AnalysisPredictor, lite_nn_adapter_npu) {
66+
AnalysisConfig config;
67+
config.SetModel(FLAGS_dirname);
68+
config.EnableLiteEngine();
69+
config.NNAdapter()
70+
.Disable()
71+
.Enable()
72+
.SetDeviceNames({"huawei_ascend_npu"})
73+
.SetContextProperties("HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS=0")
74+
.SetModelCacheDir("cache_dirr")
75+
.SetSubgraphPartitionConfigPath("")
76+
.SetModelCacheBuffers("c1", {'c'});
77+
#ifndef LITE_SUBGRAPH_WITH_NNADAPTER
78+
EXPECT_THROW(CreatePaddlePredictor<AnalysisConfig>(config),
79+
paddle::platform::EnforceNotMet);
80+
#endif
81+
}
82+
#endif
83+
6484
TEST(AnalysisPredictor, analysis_on) {
6585
AnalysisConfig config;
6686
config.SetModel(FLAGS_dirname);

paddle/fluid/inference/api/paddle_analysis_config.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,34 @@ namespace paddle {
4848
class AnalysisPredictor;
4949
struct MkldnnQuantizerConfig;
5050

51+
struct LiteNNAdapterConfig {
52+
bool use_nnadapter{false};
53+
std::string nnadapter_model_cache_dir;
54+
std::map<std::string, std::vector<char>> nnadapter_model_cache_buffers;
55+
std::vector<std::string> nnadapter_device_names;
56+
std::string nnadapter_context_properties;
57+
std::string nnadapter_subgraph_partition_config_path;
58+
std::string nnadapter_subgraph_partition_config_buffer;
59+
60+
LiteNNAdapterConfig& SetDeviceNames(const std::vector<std::string>& names);
61+
62+
LiteNNAdapterConfig& SetContextProperties(const std::string& properties);
63+
64+
LiteNNAdapterConfig& SetModelCacheDir(const std::string& dir);
65+
66+
LiteNNAdapterConfig& SetModelCacheBuffers(
67+
const std::string& model_cache_token,
68+
const std::vector<char>& model_cache_buffer);
69+
70+
LiteNNAdapterConfig& SetSubgraphPartitionConfigPath(const std::string& path);
71+
72+
LiteNNAdapterConfig& SetSubgraphPartitionConfigBuffer(
73+
const std::string& buffer);
74+
75+
LiteNNAdapterConfig& Enable();
76+
LiteNNAdapterConfig& Disable();
77+
};
78+
5179
///
5280
/// \brief configuration manager for AnalysisPredictor.
5381
/// \since 1.7.0
@@ -692,6 +720,8 @@ struct PD_INFER_DECL AnalysisConfig {
692720
///
693721
std::string Summary();
694722

723+
LiteNNAdapterConfig& NNAdapter() { return nnadapter_config_; }
724+
695725
protected:
696726
// Update the config.
697727
void Update();
@@ -800,6 +830,9 @@ struct PD_INFER_DECL AnalysisConfig {
800830
std::string xpu_precision_;
801831
bool xpu_adaptive_seqlen_;
802832

833+
// NNAdapter related
834+
LiteNNAdapterConfig nnadapter_config_;
835+
803836
// mkldnn related.
804837
int mkldnn_cache_capacity_{10};
805838
bool use_mkldnn_quantizer_{false};

paddle/fluid/inference/lite/CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ if(XPU_SDK_ROOT)
22
set(XPU_DEPS xpuapi xpurt)
33
endif()
44

5-
cc_library(lite_op_teller SRCS op_teller.cc DEPS lite_full_static framework_proto device_context boost xxhash)
6-
cc_library(lite_engine SRCS engine.cc DEPS lite_full_static framework_proto ${XPU_DEPS})
7-
cc_library(lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy lite_full_static framework_proto boost device_context ${XPU_DEPS})
5+
cc_library(lite_op_teller SRCS op_teller.cc DEPS ${LITE_DEPS} framework_proto device_context boost xxhash)
6+
cc_library(lite_engine SRCS engine.cc DEPS ${LITE_DEPS} framework_proto ${XPU_DEPS})
7+
cc_library(lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy ${LITE_DEPS} framework_proto boost device_context ${XPU_DEPS})
88
cc_test(test_lite_engine SRCS test_engine_lite.cc DEPS lite_engine protobuf framework_proto glog gtest analysis)
99
cc_test(test_lite_tensor_utils SRCS test_tensor_utils.cc DEPS lite_engine lite_tensor_utils)

0 commit comments

Comments
 (0)