Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/external/lite.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
set(LITE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/lite)

if(NOT LITE_GIT_TAG)
set(LITE_GIT_TAG dfdfa6440c83bf0b415f9f5a9ff84842ce0bb0fa)
set(LITE_GIT_TAG 6d2b2a4028a58715b01887b04eb9bff8432eb184)
endif()

if(NOT CUDA_ARCH_NAME)
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/inference/analysis/argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,10 @@ struct Argument {

DECL_ARGUMENT_FIELD(fusion_statis, FusionStatis, fusion_statis_t);

// Only used in paddle-lite subgraph.
DECL_ARGUMENT_FIELD(cpu_math_library_num_threads, CpuMathLibraryNumThreads,
int);

private:
std::unordered_set<std::string> valid_fields_;
};
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/inference/analysis/ir_pass_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("use_xpu", new bool(argument->use_xpu()));
pass->Set("xpu_l3_workspace_size",
new int(argument->xpu_l3_workspace_size()));
pass->Set("cpu_math_library_num_threads",
new int(argument->cpu_math_library_num_threads()));
}
disable_logs_ = argument->disable_logs();
if (pass_name == "fc_fuse_pass") {
Expand Down
10 changes: 6 additions & 4 deletions paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ void LiteSubgraphPass::SetUpEngine(
bool enable_int8 = Get<bool>("enable_int8");
bool use_xpu = Get<bool>("use_xpu");
int xpu_l3_workspace_size = Get<int>("xpu_l3_workspace_size");
int cpu_math_library_num_threads = Get<int>("cpu_math_library_num_threads");

lite_api::TargetType target_type;
if (use_gpu) {
Expand All @@ -263,11 +264,12 @@ void LiteSubgraphPass::SetUpEngine(
// Notice: The ordering here determines the device where the
// input tensor of the Lite engine is located, and then affects
// whether tensor sharing is feasible.
paddle::lite::Place({target_type, precision_type}),
paddle::lite::Place({target_type, PRECISION(kInt64)}),
paddle::lite::Place({target_type, PRECISION(kFloat)}),
paddle::lite::Place({TARGET(kHost), PRECISION(kFloat)}),
paddle::lite_api::Place({target_type, precision_type}),
paddle::lite_api::Place({target_type, PRECISION(kInt64)}),
paddle::lite_api::Place({target_type, PRECISION(kFloat)}),
paddle::lite_api::Place({TARGET(kHost), PRECISION(kFloat)}),
};
config.cpu_math_library_num_threads = cpu_math_library_num_threads;
config.xpu_l3_workspace_size = xpu_l3_workspace_size;
if (dump_model) {
lite::StrToBinaryFile("./model.bin", config.model);
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,8 @@ void AnalysisPredictor::PrepareArgument() {
}

if (config_.lite_engine_enabled()) {
argument_.SetCpuMathLibraryNumThreads(
config_.cpu_math_library_num_threads());
argument_.SetLitePrecisionMode(config_.lite_precision_mode_);
argument_.SetLitePassesFilter(config_.lite_passes_filter_);
argument_.SetLiteOpsFilter(config_.lite_ops_filter_);
Expand Down
46 changes: 29 additions & 17 deletions paddle/fluid/inference/lite/engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,12 @@
#define LITE_WITH_XPU 1
#endif

#ifndef PADDLE_WITH_ARM
#define LITE_WITH_X86 1
#endif

#include "paddle/fluid/inference/lite/engine.h"
#include "lite/api/paddle_use_passes.h"
#include <utility>

namespace paddle {
namespace inference {
Expand All @@ -36,32 +40,40 @@ bool EngineManager::Has(const std::string& name) const {
return engines_.at(name).get() != nullptr;
}

paddle::lite::Predictor* EngineManager::Get(const std::string& name) const {
paddle::lite_api::PaddlePredictor* EngineManager::Get(
const std::string& name) const {
return engines_.at(name).get();
}

paddle::lite::Predictor* EngineManager::Create(const std::string& name,
const EngineConfig& cfg) {
if (cfg.valid_places.front().target == TARGET(kCUDA)) {
#ifdef PADDLE_WITH_CUDA
paddle::lite::Env<TARGET(kCUDA)>::Init();
paddle::lite_api::PaddlePredictor* EngineManager::Create(
const std::string& name, const EngineConfig& cfg) {
// config info for predictor.
paddle::lite_api::CxxConfig lite_cxx_config;
lite_cxx_config.set_model_buffer(cfg.model.c_str(), cfg.model.size(),
cfg.param.c_str(), cfg.param.size());
lite_cxx_config.set_valid_places(cfg.valid_places);
#ifdef PADDLE_WITH_ARM
set_threads.set_threads(cfg.cpu_math_library_num_threads);
#else
lite_cxx_config.set_x86_math_library_num_threads(
cfg.cpu_math_library_num_threads);
#endif
} else if (cfg.valid_places.front().target == TARGET(kXPU)) {

#ifdef PADDLE_WITH_XPU
paddle::lite::TargetWrapper<TARGET(kXPU)>::workspace_l3_size_per_thread =
cfg.xpu_l3_workspace_size;
lite_cxx_config.set_xpu_workspace_l3_size_per_thread(
cfg.xpu_l3_workspace_size);
#endif
}
auto* p = new paddle::lite::Predictor();
p->Build("", cfg.model, cfg.param, cfg.valid_places, cfg.neglected_passes,
cfg.model_type, cfg.model_from_memory);
engines_[name].reset(p);
return p;

// create predictor
std::shared_ptr<paddle::lite_api::PaddlePredictor> p =
paddle::lite_api::CreatePaddlePredictor(lite_cxx_config);
engines_[name] = std::move(p);
return engines_[name].get();
}

void EngineManager::DeleteAll() {
for (auto& item : engines_) {
item.second.reset(nullptr);
item.second.reset();
}
}

Expand Down
27 changes: 16 additions & 11 deletions paddle/fluid/inference/lite/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,9 @@
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wall"
#include "lite/api/cxx_api.h"
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_place.h"
#include "lite/core/context.h"
#include "lite/core/device_info.h"
#include "lite/core/memory.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
#include "lite/api/paddle_use_passes.h"
#pragma GCC diagnostic pop

namespace paddle {
Expand All @@ -38,25 +35,33 @@ namespace lite {
struct EngineConfig {
std::string model;
std::string param;
paddle::lite::Place prefer_place;
std::vector<paddle::lite::Place> valid_places;
std::vector<paddle::lite_api::Place> valid_places;
std::vector<std::string> neglected_passes;
lite_api::LiteModelType model_type{lite_api::LiteModelType::kProtobuf};
bool model_from_memory{true};

// for xpu
size_t xpu_l3_workspace_size;

// for x86 or arm
int cpu_math_library_num_threads{1};

// for cuda
bool use_multi_stream{false};
};

class EngineManager {
public:
bool Empty() const;
bool Has(const std::string& name) const;
paddle::lite::Predictor* Get(const std::string& name) const;
paddle::lite::Predictor* Create(const std::string& name,
const EngineConfig& cfg);
paddle::lite_api::PaddlePredictor* Get(const std::string& name) const;
paddle::lite_api::PaddlePredictor* Create(const std::string& name,
const EngineConfig& cfg);
void DeleteAll();

private:
std::unordered_map<std::string, std::unique_ptr<paddle::lite::Predictor>>
std::unordered_map<std::string,
std::shared_ptr<paddle::lite_api::PaddlePredictor>>
engines_;
};

Expand Down
97 changes: 71 additions & 26 deletions paddle/fluid/inference/lite/tensor_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

#include "paddle/fluid/inference/lite/tensor_utils.h"
#include <functional>
#include <map>
#include <memory>
#include "paddle/fluid/framework/data_type.h"
Expand Down Expand Up @@ -144,16 +145,55 @@ void MemoryCopyAsync(const platform::Place& dst_place, void* dst_data,
}
}

void InitDstTensor(paddle::lite::Tensor* dst, const framework::LoDTensor& src) {
void* GetLiteTensorDataPtr(paddle::lite_api::Tensor* src,
PrecisionType precision_type,
TargetType target_type) {
void* res{nullptr};
switch (precision_type) {
case PrecisionType::kFloat:
res = static_cast<void*>(src->mutable_data<float>(target_type));
break;
case PrecisionType::kInt8:
res = static_cast<void*>(src->mutable_data<int8_t>(target_type));
break;
case PrecisionType::kInt32:
res = static_cast<void*>(src->mutable_data<int32_t>(target_type));
break;
case PrecisionType::kInt64:
res = static_cast<void*>(src->mutable_data<int64_t>(target_type));
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported precision type. Now only supports FP32, INT8, INT32 and "
"INT64."));
break;
}
return res;
}

int64_t GetLiteTensorNumel(const paddle::lite_api::Tensor& tensor) {
auto shape = tensor.shape();
int64_t numel = std::accumulate(shape.begin(), shape.end(), 1,
std::multiplies<int64_t>());
return numel;
}

void InitDstTensor(paddle::lite_api::Tensor* dst,
const framework::LoDTensor& src) {
// Currently, Lite needs to explicitly specify the target type of
// the input tensor.
constexpr int empty_size = 0;
dst->mutable_data(GetLiteTargetType(src.place()), empty_size);
dst->set_precision(GetLitePrecisionType(src.type()));
SetLoD(dst->mutable_lod(), src.lod());
dst->Resize({empty_size});
GetLiteTensorDataPtr(dst, GetLitePrecisionType(src.type()),
GetLiteTargetType(src.place()));
dst->SetPrecision(GetLitePrecisionType(src.type()));
paddle::lite::LoD lite_lod;
SetLoD(&lite_lod, src.lod());
dst->SetLoD(lite_lod);
}

void InitDstTensor(framework::LoDTensor* dst, const paddle::lite::Tensor& src) {
void InitDstTensor(framework::LoDTensor* dst,
const paddle::lite_api::Tensor& src) {
constexpr framework::proto::VarType::Type dtype =
framework::proto::VarType_Type_FP32;
dst->mutable_data(inference::lite::utils::GetNativePlace(src.target()),
Expand All @@ -162,7 +202,8 @@ void InitDstTensor(framework::LoDTensor* dst, const paddle::lite::Tensor& src) {
}

template <>
void TensorCopyAsync(paddle::lite::Tensor* dst, const framework::LoDTensor& src,
void TensorCopyAsync(paddle::lite_api::Tensor* dst,
const framework::LoDTensor& src,
const platform::DeviceContext& ctx) {
InitDstTensor(dst, src);
const platform::Place& src_place = src.place();
Expand All @@ -171,52 +212,56 @@ void TensorCopyAsync(paddle::lite::Tensor* dst, const framework::LoDTensor& src,
static_cast<size_t>(src.numel()) * framework::SizeOfType(src.type());
dst->Resize(framework::vectorize(src.dims()));
const void* src_data = src.data<void>();
void* dst_data = dst->mutable_data(bytes);
void* dst_data{nullptr};
dst_data = GetLiteTensorDataPtr(dst, GetLitePrecisionType(src.type()),
GetLiteTargetType(src.place()));
VLOG(3) << "[CopyAsync fluid -> lite] Bytes = " << bytes << ", src = " << &src
<< ", dst = " << dst << ", src_type = " << src.type();
MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx);
VLOG(3) << "[Lite memory size] Bytes = " << dst->memory_size();
VLOG(3) << "[Lite memory size] Bytes = " << bytes;
}

template <>
void TensorCopyAsync(framework::LoDTensor* dst, const paddle::lite::Tensor& src,
void TensorCopyAsync(framework::LoDTensor* dst,
const paddle::lite_api::Tensor& src,
const platform::DeviceContext& ctx) {
dst->Resize(paddle::framework::make_ddim(src.dims().Vectorize()));
dst->Resize(paddle::framework::make_ddim(src.shape()));
InitDstTensor(dst, src);
const platform::Place& src_place = GetNativePlace(src.target());
const platform::Place& dst_place = dst->place();
const size_t bytes =
static_cast<size_t>(src.numel()) * framework::SizeOfType(dst->type());
const void* src_data = src.raw_data();
int64_t src_numel = GetLiteTensorNumel(src);
const size_t bytes = src_numel * framework::SizeOfType(dst->type());
const void* src_data = src.data<void>();
// When Lite is ready, the source type needs to be modified here.
void* dst_data = dst->mutable_data(dst_place, dst->type());
VLOG(3) << "[CopyAsync lite -> fluid] Bytes = " << bytes << ", src = " << &src
<< ", dst = " << dst << ", src_type = " << dst->type();
MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx);
VLOG(3) << "[Lite memory size] Bytes = " << src.memory_size();
VLOG(3) << "[Lite memory size] Bytes = " << bytes;
}

template <>
void TensorDataShare(paddle::lite::Tensor* dst, framework::LoDTensor* src) {
const size_t bytes =
static_cast<size_t>(src->numel()) * framework::SizeOfType(src->type());
auto buf = std::make_shared<paddle::lite::Buffer>(paddle::lite::Buffer(
src->data<void>(), GetLiteTargetType(src->place()), src->memory_size()));
void TensorDataShare(paddle::lite_api::Tensor* dst, framework::LoDTensor* src) {
dst->Resize(framework::vectorize(src->dims()));
dst->set_precision(GetLitePrecisionType(src->type()));
SetLoD(dst->mutable_lod(), src->lod());
dst->ResetBuffer(buf, bytes);
dst->ShareExternalMemory(src->data<void>(), src->memory_size(),
GetLiteTargetType(src->place()));
dst->SetPrecision(GetLitePrecisionType(src->type()));
paddle::lite::LoD lite_lod;
SetLoD(&lite_lod, src->lod());
dst->SetLoD(lite_lod);
}

template <>
void TensorDataShare(framework::LoDTensor* dst, paddle::lite::Tensor* src) {
void TensorDataShare(framework::LoDTensor* dst, paddle::lite_api::Tensor* src) {
constexpr framework::proto::VarType::Type dtype =
framework::proto::VarType_Type_FP32;
void* src_raw_data = src->raw_data();
void* src_raw_data =
GetLiteTensorDataPtr(src, GetLitePrecisionType(dtype), src->target());
size_t memory_size = GetLiteTensorNumel(*src) * sizeof(float);
std::shared_ptr<memory::allocation::Allocation> holder(
new memory::allocation::Allocation(src_raw_data, src->memory_size(),
new memory::allocation::Allocation(src_raw_data, memory_size,
GetNativePlace(src->target())));
dst->Resize(paddle::framework::make_ddim(src->dims().Vectorize()));
dst->Resize(paddle::framework::make_ddim(src->shape()));
SetLoD(dst->mutable_lod(), src->lod());
dst->ResetHolderWithType(holder, dtype);
}
Expand Down
8 changes: 4 additions & 4 deletions paddle/fluid/inference/lite/test_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,10 @@ TEST(EngineManager, engine) {
config.model_from_memory = true;
config.valid_places = {
#ifdef PADDLE_WITH_CUDA
paddle::lite::Place({TARGET(kCUDA), PRECISION(kFloat)}),
paddle::lite_api::Place({TARGET(kCUDA), PRECISION(kFloat)}),
#endif
paddle::lite::Place({TARGET(kX86), PRECISION(kFloat)}),
paddle::lite::Place({TARGET(kHost), PRECISION(kAny)}),
paddle::lite_api::Place({TARGET(kX86), PRECISION(kFloat)}),
paddle::lite_api::Place({TARGET(kHost), PRECISION(kAny)}),
};

LOG(INFO) << "Create EngineManager";
Expand All @@ -118,7 +118,7 @@ TEST(EngineManager, engine) {
ASSERT_EQ(inference::Singleton<inference::lite::EngineManager>::Global().Has(
unique_key),
true);
paddle::lite::Predictor* engine_0 =
paddle::lite_api::PaddlePredictor* engine_0 =
inference::Singleton<inference::lite::EngineManager>::Global().Get(
unique_key);
CHECK_NOTNULL(engine_0);
Expand Down
Loading