Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 14 additions & 23 deletions fastdeploy/pybind/runtime.cc
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,17 @@ void BindRuntime(pybind11::module& m) {
.def("use_openvino_backend", &RuntimeOption::UseOpenVINOBackend)
.def("use_lite_backend", &RuntimeOption::UseLiteBackend)
.def("set_lite_device_names", &RuntimeOption::SetLiteDeviceNames)
.def("set_lite_context_properties", &RuntimeOption::SetLiteContextProperties)
.def("set_lite_context_properties",
&RuntimeOption::SetLiteContextProperties)
.def("set_lite_model_cache_dir", &RuntimeOption::SetLiteModelCacheDir)
.def("set_lite_dynamic_shape_info", &RuntimeOption::SetLiteDynamicShapeInfo)
.def("set_lite_subgraph_partition_path", &RuntimeOption::SetLiteSubgraphPartitionPath)
.def("set_lite_mixed_precision_quantization_config_path", &RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath)
.def("set_lite_subgraph_partition_config_buffer", &RuntimeOption::SetLiteSubgraphPartitionConfigBuffer)
.def("set_lite_dynamic_shape_info",
&RuntimeOption::SetLiteDynamicShapeInfo)
.def("set_lite_subgraph_partition_path",
&RuntimeOption::SetLiteSubgraphPartitionPath)
.def("set_lite_mixed_precision_quantization_config_path",
&RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath)
.def("set_lite_subgraph_partition_config_buffer",
&RuntimeOption::SetLiteSubgraphPartitionConfigBuffer)
.def("set_paddle_mkldnn", &RuntimeOption::SetPaddleMKLDNN)
.def("set_openvino_device", &RuntimeOption::SetOpenVINODevice)
.def("set_openvino_shape_info", &RuntimeOption::SetOpenVINOShapeInfo)
Expand Down Expand Up @@ -114,21 +119,7 @@ void BindRuntime(pybind11::module& m) {
.def_readwrite("ipu_available_memory_proportion",
&RuntimeOption::ipu_available_memory_proportion)
.def_readwrite("ipu_enable_half_partial",
&RuntimeOption::ipu_enable_half_partial)
.def_readwrite("kunlunxin_l3_workspace_size",
&RuntimeOption::kunlunxin_l3_workspace_size)
.def_readwrite("kunlunxin_locked",
&RuntimeOption::kunlunxin_locked)
.def_readwrite("kunlunxin_autotune",
&RuntimeOption::kunlunxin_autotune)
.def_readwrite("kunlunxin_autotune_file",
&RuntimeOption::kunlunxin_autotune_file)
.def_readwrite("kunlunxin_precision",
&RuntimeOption::kunlunxin_precision)
.def_readwrite("kunlunxin_adaptive_seqlen",
&RuntimeOption::kunlunxin_adaptive_seqlen)
.def_readwrite("kunlunxin_enable_multi_stream",
&RuntimeOption::kunlunxin_enable_multi_stream);
&RuntimeOption::ipu_enable_half_partial);

pybind11::class_<TensorInfo>(m, "TensorInfo")
.def_readwrite("name", &TensorInfo::name)
Expand All @@ -151,9 +142,9 @@ void BindRuntime(pybind11::module& m) {
auto dtype =
NumpyDataTypeToFDDataType(warm_datas[i][j].dtype());
std::vector<int64_t> data_shape;
data_shape.insert(data_shape.begin(), warm_datas[i][j].shape(),
warm_datas[i][j].shape() +
warm_datas[i][j].ndim());
data_shape.insert(
data_shape.begin(), warm_datas[i][j].shape(),
warm_datas[i][j].shape() + warm_datas[i][j].ndim());
warm_tensors[i][j].Resize(data_shape, dtype);
memcpy(warm_tensors[i][j].MutableData(),
warm_datas[i][j].mutable_data(),
Expand Down
33 changes: 1 addition & 32 deletions fastdeploy/runtime/runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -390,43 +390,12 @@ void Runtime::CreateTrtBackend() {

void Runtime::CreateLiteBackend() {
#ifdef ENABLE_LITE_BACKEND
auto lite_option = LiteBackendOption();
lite_option.threads = option.cpu_thread_num;
lite_option.enable_int8 = option.lite_enable_int8;
lite_option.enable_fp16 = option.lite_enable_fp16;
lite_option.power_mode = static_cast<int>(option.lite_power_mode);
lite_option.optimized_model_dir = option.lite_optimized_model_dir;
lite_option.nnadapter_subgraph_partition_config_path =
option.lite_nnadapter_subgraph_partition_config_path;
lite_option.nnadapter_subgraph_partition_config_buffer =
option.lite_nnadapter_subgraph_partition_config_buffer;
lite_option.nnadapter_device_names = option.lite_nnadapter_device_names;
lite_option.nnadapter_context_properties =
option.lite_nnadapter_context_properties;
lite_option.nnadapter_model_cache_dir = option.lite_nnadapter_model_cache_dir;
lite_option.nnadapter_dynamic_shape_info =
option.lite_nnadapter_dynamic_shape_info;
lite_option.nnadapter_mixed_precision_quantization_config_path =
option.lite_nnadapter_mixed_precision_quantization_config_path;
lite_option.enable_timvx = option.enable_timvx;
lite_option.enable_ascend = option.enable_ascend;
lite_option.enable_kunlunxin = option.enable_kunlunxin;
lite_option.device_id = option.device_id;
lite_option.kunlunxin_l3_workspace_size = option.kunlunxin_l3_workspace_size;
lite_option.kunlunxin_locked = option.kunlunxin_locked;
lite_option.kunlunxin_autotune = option.kunlunxin_autotune;
lite_option.kunlunxin_autotune_file = option.kunlunxin_autotune_file;
lite_option.kunlunxin_precision = option.kunlunxin_precision;
lite_option.kunlunxin_adaptive_seqlen = option.kunlunxin_adaptive_seqlen;
lite_option.kunlunxin_enable_multi_stream =
option.kunlunxin_enable_multi_stream;

FDASSERT(option.model_format == ModelFormat::PADDLE,
"LiteBackend only support model format of ModelFormat::PADDLE");
backend_ = utils::make_unique<LiteBackend>();
auto casted_backend = dynamic_cast<LiteBackend*>(backend_.get());
FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file,
lite_option),
option.paddle_lite_option),
"Load model from nb file failed while initializing LiteBackend.");
#else
FDASSERT(false,
Expand Down
57 changes: 33 additions & 24 deletions fastdeploy/runtime/runtime_option.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ void RuntimeOption::UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name,
}

void RuntimeOption::UseTimVX() {
enable_timvx = true;
device = Device::TIMVX;
paddle_lite_option.enable_timvx = true;
}

void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
Expand All @@ -95,21 +95,21 @@ void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
const std::string& precision,
bool adaptive_seqlen,
bool enable_multi_stream) {
enable_kunlunxin = true;
device_id = kunlunxin_id;
kunlunxin_l3_workspace_size = l3_workspace_size;
kunlunxin_locked = locked;
kunlunxin_autotune = autotune;
kunlunxin_autotune_file = autotune_file;
kunlunxin_precision = precision;
kunlunxin_adaptive_seqlen = adaptive_seqlen;
kunlunxin_enable_multi_stream = enable_multi_stream;
device = Device::KUNLUNXIN;
paddle_lite_option.enable_kunlunxin = true;
paddle_lite_option.device_id = kunlunxin_id;
paddle_lite_option.kunlunxin_l3_workspace_size = l3_workspace_size;
paddle_lite_option.kunlunxin_locked = locked;
paddle_lite_option.kunlunxin_autotune = autotune;
paddle_lite_option.kunlunxin_autotune_file = autotune_file;
paddle_lite_option.kunlunxin_precision = precision;
paddle_lite_option.kunlunxin_adaptive_seqlen = adaptive_seqlen;
paddle_lite_option.kunlunxin_enable_multi_stream = enable_multi_stream;
}

void RuntimeOption::UseAscend() {
enable_ascend = true;
device = Device::ASCEND;
paddle_lite_option.enable_ascend = true;
}

void RuntimeOption::UseSophgo() {
Expand All @@ -124,6 +124,7 @@ void RuntimeOption::SetExternalStream(void* external_stream) {
void RuntimeOption::SetCpuThreadNum(int thread_num) {
FDASSERT(thread_num > 0, "The thread_num must be greater than 0.");
cpu_thread_num = thread_num;
paddle_lite_option.threads = thread_num;
}

void RuntimeOption::SetOrtGraphOptLevel(int level) {
Expand Down Expand Up @@ -231,57 +232,65 @@ void RuntimeOption::SetOpenVINODevice(const std::string& name) {
openvino_device = name;
}

void RuntimeOption::EnableLiteFP16() { lite_enable_fp16 = true; }
void RuntimeOption::EnableLiteFP16() { paddle_lite_option.enable_fp16 = true; }

void RuntimeOption::DisableLiteFP16() { lite_enable_fp16 = false; }
void RuntimeOption::EnableLiteInt8() { lite_enable_int8 = true; }
void RuntimeOption::DisableLiteFP16() {
paddle_lite_option.enable_fp16 = false;
}

void RuntimeOption::EnableLiteInt8() { paddle_lite_option.enable_int8 = true; }

void RuntimeOption::DisableLiteInt8() {
paddle_lite_option.enable_int8 = false;
}

void RuntimeOption::DisableLiteInt8() { lite_enable_int8 = false; }
void RuntimeOption::SetLitePowerMode(LitePowerMode mode) {
lite_power_mode = mode;
paddle_lite_option.power_mode = mode;
}

void RuntimeOption::SetLiteOptimizedModelDir(
const std::string& optimized_model_dir) {
lite_optimized_model_dir = optimized_model_dir;
paddle_lite_option.optimized_model_dir = optimized_model_dir;
}

void RuntimeOption::SetLiteSubgraphPartitionPath(
const std::string& nnadapter_subgraph_partition_config_path) {
lite_nnadapter_subgraph_partition_config_path =
paddle_lite_option.nnadapter_subgraph_partition_config_path =
nnadapter_subgraph_partition_config_path;
}

void RuntimeOption::SetLiteSubgraphPartitionConfigBuffer(
const std::string& nnadapter_subgraph_partition_config_buffer) {
lite_nnadapter_subgraph_partition_config_buffer =
paddle_lite_option.nnadapter_subgraph_partition_config_buffer =
nnadapter_subgraph_partition_config_buffer;
}

void RuntimeOption::SetLiteDeviceNames(
const std::vector<std::string>& nnadapter_device_names) {
lite_nnadapter_device_names = nnadapter_device_names;
paddle_lite_option.nnadapter_device_names = nnadapter_device_names;
}

void RuntimeOption::SetLiteContextProperties(
const std::string& nnadapter_context_properties) {
lite_nnadapter_context_properties = nnadapter_context_properties;
paddle_lite_option.nnadapter_context_properties =
nnadapter_context_properties;
}

void RuntimeOption::SetLiteModelCacheDir(
const std::string& nnadapter_model_cache_dir) {
lite_nnadapter_model_cache_dir = nnadapter_model_cache_dir;
paddle_lite_option.nnadapter_model_cache_dir = nnadapter_model_cache_dir;
}

void RuntimeOption::SetLiteDynamicShapeInfo(
const std::map<std::string, std::vector<std::vector<int64_t>>>&
nnadapter_dynamic_shape_info) {
lite_nnadapter_dynamic_shape_info = nnadapter_dynamic_shape_info;
paddle_lite_option.nnadapter_dynamic_shape_info =
nnadapter_dynamic_shape_info;
}

void RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath(
const std::string& nnadapter_mixed_precision_quantization_config_path) {
lite_nnadapter_mixed_precision_quantization_config_path =
paddle_lite_option.nnadapter_mixed_precision_quantization_config_path =
nnadapter_mixed_precision_quantization_config_path;
}

Expand Down
39 changes: 5 additions & 34 deletions fastdeploy/runtime/runtime_option.h
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,8 @@ struct FASTDEPLOY_DECL RuntimeOption {
bool enable_half_partial = false);

Backend backend = Backend::UNKNOWN;
// for cpu inference and preprocess

// for cpu inference
// default will let the backend choose their own default value
int cpu_thread_num = -1;
int device_id = 0;
Expand Down Expand Up @@ -388,31 +389,6 @@ struct FASTDEPLOY_DECL RuntimeOption {
float ipu_available_memory_proportion = 1.0;
bool ipu_enable_half_partial = false;

// ======Only for Paddle Lite Backend=====
// 0: LITE_POWER_HIGH 1: LITE_POWER_LOW 2: LITE_POWER_FULL
// 3: LITE_POWER_NO_BIND 4: LITE_POWER_RAND_HIGH
// 5: LITE_POWER_RAND_LOW
LitePowerMode lite_power_mode = LitePowerMode::LITE_POWER_NO_BIND;
// enable int8 or not
bool lite_enable_int8 = false;
// enable fp16 or not
bool lite_enable_fp16 = false;
// optimized model dir for CxxConfig
std::string lite_optimized_model_dir = "";
std::string lite_nnadapter_subgraph_partition_config_path = "";
// and other nnadapter settings for CxxConfig
std::string lite_nnadapter_subgraph_partition_config_buffer = "";
std::string lite_nnadapter_context_properties = "";
std::string lite_nnadapter_model_cache_dir = "";
std::string lite_nnadapter_mixed_precision_quantization_config_path = "";
std::map<std::string, std::vector<std::vector<int64_t>>>
lite_nnadapter_dynamic_shape_info = {{"", {{0}}}};
std::vector<std::string> lite_nnadapter_device_names = {};

bool enable_timvx = false;
bool enable_ascend = false;
bool enable_kunlunxin = false;

// ======Only for Trt Backend=======
std::map<std::string, std::vector<int32_t>> trt_max_shape;
std::map<std::string, std::vector<int32_t>> trt_min_shape;
Expand Down Expand Up @@ -444,14 +420,9 @@ struct FASTDEPLOY_DECL RuntimeOption {
fastdeploy::rknpu2::CoreMask rknpu2_core_mask_ =
fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO;

// ======Only for KunlunXin XPU Backend=======
int kunlunxin_l3_workspace_size = 0xfffc00;
bool kunlunxin_locked = false;
bool kunlunxin_autotune = true;
std::string kunlunxin_autotune_file = "";
std::string kunlunxin_precision = "int16";
bool kunlunxin_adaptive_seqlen = false;
bool kunlunxin_enable_multi_stream = false;

/// Option to configure Paddle Lite backend
LiteBackendOption paddle_lite_option;

std::string model_file = ""; // Path of model file
std::string params_file = ""; // Path of parameters file, can be empty
Expand Down