Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion paddle/fluid/inference/lite/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ cc_binary(test_leaky_relu SRCS test_leaky_relu.cc DEPS lite_full_static dynload_
cc_library(lite_op_teller SRCS op_teller.cc DEPS framework_proto device_context boost xxhash)
cc_library(lite_engine SRCS engine.cc DEPS lite_full_static framework_proto)
cc_library(lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy lite_full_static framework_proto boost)
cc_test(test_lite_engine SRCS test_engine.cc DEPS lite_engine protobuf)
cc_test(test_lite_engine SRCS test_engine.cc DEPS lite_engine protobuf framework_proto glog gtest analysis)
cc_test(test_lite_predictor SRCS test_predictor.cc DEPS lite_engine paddle_fluid)
18 changes: 7 additions & 11 deletions paddle/fluid/inference/lite/engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,16 @@
#define LITE_WITH_CUDA 1

#include "paddle/fluid/inference/lite/engine.h"
#include "lite/core/context.h"
#include "lite/core/device_info.h"

namespace paddle {
namespace inference {
namespace lite {

bool EngineManager::Empty() const {
return engines_.size() == 0;
}
bool EngineManager::Empty() const { return engines_.size() == 0; }

bool EngineManager::Has(const std::string& name) const {
if (engines_.count(name) == 0) {
return false;
return false;
}
return engines_.at(name).get() != nullptr;
}
Expand All @@ -37,12 +33,12 @@ paddle::lite::Predictor* EngineManager::Get(const std::string& name) const {
return engines_.at(name).get();
}

paddle::lite::Predictor* EngineManager::Create(
const std::string& name, const EngineConfig& cfg) {
paddle::lite::Env<TARGET(kCUDA)>::Init();
paddle::lite::Predictor* EngineManager::Create(const std::string& name,
const EngineConfig& cfg) {
auto* p = new paddle::lite::Predictor();
p->Build("", cfg.model, cfg.param, cfg.prefer_place, cfg.valid_places, cfg.neglected_passes,
cfg.model_type, cfg.memory_from_memory);
paddle::lite::Env<TARGET(kCUDA)>::Init();
p->Build("", cfg.model, cfg.param, cfg.prefer_place, cfg.valid_places,
cfg.neglected_passes, cfg.model_type, cfg.model_from_memory);
engines_[name].reset(p);
return p;
}
Expand Down
11 changes: 8 additions & 3 deletions paddle/fluid/inference/lite/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
#pragma once

#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>

#include "lite/api/cxx_api.h"

Expand All @@ -31,18 +33,21 @@ struct EngineConfig {
std::vector<paddle::lite::Place> valid_places;
std::vector<std::string> neglected_passes;
lite_api::LiteModelType model_type{lite_api::LiteModelType::kProtobuf};
bool memory_from_memory{true};
bool model_from_memory{true};
};

class EngineManager {
public:
bool Empty() const;
bool Has(const std::string& name) const;
paddle::lite::Predictor* Get(const std::string& name) const;
paddle::lite::Predictor* Create(const std::string& name, const EngineConfig& cfg);
paddle::lite::Predictor* Create(const std::string& name,
const EngineConfig& cfg);
void DeleteAll();

private:
std::unordered_map<std::string, std::unique_ptr<paddle::lite::Predictor>> engines_;
std::unordered_map<std::string, std::unique_ptr<paddle::lite::Predictor>>
engines_;
};

} // namespace lite
Expand Down
55 changes: 40 additions & 15 deletions paddle/fluid/inference/lite/tensor_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include <map>
#include "paddle/fluid/inference/lite/engine.h"
#include "paddle/fluid/inference/lite/tensor_utils.h"
#include <map>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/lite/engine.h"

namespace paddle {
namespace inference {
Expand All @@ -40,7 +40,20 @@ platform::Place GetNativePlace(const TargetType& type) {
}
}

framework::proto::VarType::Type GetNativePrecisionType(const PrecisionType& type) {
PrecisionType GetLitePrecisionType(framework::proto::VarType::Type type) {
switch (type) {
case framework::proto::VarType_Type_FP32:
return PrecisionType::kFloat;
case framework::proto::VarType_Type_INT8:
return PrecisionType::kInt8;
default:
LOG(FATAL) << "Error precision type.";
return PrecisionType::kUnk;
}
}

framework::proto::VarType::Type GetNativePrecisionType(
const PrecisionType& type) {
switch (type) {
case PrecisionType::kFloat:
return framework::proto::VarType_Type_FP32;
Expand All @@ -63,22 +76,27 @@ framework::DataLayout GetNativeLayoutType(const DataLayoutType& type) {
}

void MemoryCopy(const platform::Place& dst_place, void* dst_data,
const platform::Place& src_place, const void* src_data, const size_t size) {
const platform::Place& src_place, const void* src_data,
const size_t size) {
const platform::CPUPlace cpu_place;
const platform::CUDAPlace gpu_place;
if (platform::is_cpu_place(dst_place) && platform::is_cpu_place(src_place)) {
memory::Copy(cpu_place, dst_data, cpu_place, src_data, size);
} else {
#ifdef PADDLE_WITH_CUDA
// get device context from pool
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto &ctx = *pool.Get(platform::CUDAPlace());
auto stream = reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
if (platform::is_cpu_place(dst_place) && platform::is_gpu_place(src_place)) {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto& ctx = *pool.Get(platform::CUDAPlace());
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
if (platform::is_cpu_place(dst_place) &&
platform::is_gpu_place(src_place)) {
memory::Copy(cpu_place, dst_data, gpu_place, src_data, size, stream);
} else if (platform::is_gpu_place(dst_place) && platform::is_cpu_place(src_place)) {
} else if (platform::is_gpu_place(dst_place) &&
platform::is_cpu_place(src_place)) {
memory::Copy(gpu_place, dst_data, cpu_place, src_data, size, stream);
} else if (platform::is_gpu_place(dst_place) && platform::is_gpu_place(src_place)) {
} else if (platform::is_gpu_place(dst_place) &&
platform::is_gpu_place(src_place)) {
memory::Copy(gpu_place, dst_data, gpu_place, src_data, size, stream);
}
#else
Expand All @@ -87,9 +105,14 @@ void MemoryCopy(const platform::Place& dst_place, void* dst_data,
}
}

} // namespace
} // namespace

void InitLiteTensorType(paddle::lite::Tensor* lite,
const framework::LoDTensor& fluid) {
lite->set_precision(GetLitePrecisionType(fluid.type()));
}

template<>
template <>
void TensorCopy(paddle::lite::Tensor* dst, const framework::LoDTensor& src) {
const platform::Place& src_place = src.place();
const platform::Place& dst_place = GetNativePlace(dst->target());
Expand All @@ -98,10 +121,11 @@ void TensorCopy(paddle::lite::Tensor* dst, const framework::LoDTensor& src) {
dst->Resize(framework::vectorize(src.dims()));
const void* src_data = src.data<void>();
void* dst_data = dst->mutable_data(size);
MemoryCopy(dst_place, dst_data, src_place, src_data, size);
MemoryCopy(dst_place, dst_data, src_place, src_data,
size * framework::SizeOfType(src.type()));
}

template<>
template <>
void TensorCopy(framework::LoDTensor* dst, const paddle::lite::Tensor& src) {
const platform::Place& src_place = GetNativePlace(src.target());
const platform::Place& dst_place = dst->place();
Expand All @@ -110,7 +134,8 @@ void TensorCopy(framework::LoDTensor* dst, const paddle::lite::Tensor& src) {
const size_t size = static_cast<size_t>(src.numel());
const void* src_data = src.raw_data();
void* dst_data = dst->mutable_data(dst_place, dst->type());
MemoryCopy(dst_place, dst_data, src_place, src_data, size);
MemoryCopy(dst_place, dst_data, src_place, src_data,
size * framework::SizeOfType(dst->type()));
}

} // namespace lite
Expand Down
5 changes: 4 additions & 1 deletion paddle/fluid/inference/lite/tensor_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@

#pragma once

#include "paddle/fluid/framework/tensor.h"
#include "lite/api/paddle_place.h"
#include "lite/core/tensor.h"
#include "paddle/fluid/framework/tensor.h"

namespace paddle {
namespace inference {
Expand All @@ -25,6 +25,9 @@ namespace lite {
template <typename DstTensor, typename SrcTensor>
void TensorCopy(DstTensor* dst, const SrcTensor& src);

void InitLiteTensorType(paddle::lite::Tensor* lite,
const framework::LoDTensor& fluid);

} // namespace lite
} // namespace inference
} // namespace paddle
92 changes: 68 additions & 24 deletions paddle/fluid/inference/lite/test_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ios>
#include <fstream>
#include <gtest/gtest.h>
#include <fstream>
#include <ios>

#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
Expand All @@ -23,44 +23,88 @@
#include "paddle/fluid/inference/lite/engine.h"
#include "paddle/fluid/inference/utils/singleton.h"

#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"

namespace paddle {
namespace lite {

namespace {

std::string read_file(const std::string &file) {
std::ifstream ifs(file.c_str(), std::ios::in | std::ios::binary | std::ios::ate);
std::ifstream::pos_type file_size = ifs.tellg();
ifs.seekg(0, std::ios::beg);
std::vector<char> bytes(file_size);
ifs.read(bytes.data(), file_size);
return std::string(bytes.data(), file_size);
void AddTensorToBlockDesc(framework::proto::BlockDesc* block,
const std::string& name,
const std::vector<int64_t>& shape) {
using framework::proto::VarType;
auto* var = block->add_vars();
framework::VarDesc desc(name);
desc.SetType(VarType::LOD_TENSOR);
desc.SetDataType(VarType::FP32);
desc.SetShape(shape);
*var = *desc.Proto();
}

} // namespace
void make_fake_model(std::string* model, std::string* param) {
framework::ProgramDesc program;
auto* block_ = program.Proto()->mutable_blocks(0);
LOG(INFO) << "create block desc";
framework::BlockDesc block_desc(&program, block_);
LOG(INFO) << "create feed op";
auto* feed0 = block_desc.AppendOp();
feed0->SetType("feed");
feed0->SetInput("X", {"feed"});
feed0->SetOutput("Out", {"x"});
feed0->SetAttr("col", 1);
AddTensorToBlockDesc(block_, "x", std::vector<int64_t>({2, 4, 1, 1}));
*block_->add_ops() = *feed0->Proto();
ASSERT_EQ(block_->ops_size(), 1);
framework::Scope scope;
platform::CPUPlace place;
platform::CPUDeviceContext ctx(place);
*model = program.Proto()->SerializeAsString();
}

} // namespace

TEST(EngineManager, Create) {
const std::string unique_key("engine_0");
const std::string model_dir = "/shixiaowei02/models/tmp/__model__";
TEST(EngineManager, manual) {
ASSERT_EQ(
inference::Singleton<inference::lite::EngineManager>::Global().Empty(),
true);

inference::lite::EngineConfig config;
config.model = read_file(model_dir);
config.param = "";
config.prefer_place = {TARGET(kCUDA), PRECISION(kFloat)};
make_fake_model(&(config.model), &(config.param));

const std::string unique_key("engine_0");
config.model_from_memory = true;
config.prefer_place = {TARGET(kX86), PRECISION(kFloat)};
config.valid_places = {
paddle::lite::Place({TARGET(kHost), PRECISION(kFloat)}),
paddle::lite::Place({TARGET(kX86), PRECISION(kFloat)}),
paddle::lite::Place({TARGET(kHost), PRECISION(kAny)}),
#ifdef PADDLE_WITH_CUDA
paddle::lite::Place({TARGET(kCUDA), PRECISION(kFloat)}),
paddle::lite::Place({TARGET(kCUDA), PRECISION(kFloat)}),
#endif
};

inference::Singleton<inference::lite::EngineManager>::Global()
.Create(unique_key, config);
/*
paddle::lite::Predictor* engine = inference::Singleton<inference::lite::EngineManager>::Global()
.Get(Attr<std::string>(unique_key));
*/
LOG(INFO) << "Create EngineManager";
inference::Singleton<inference::lite::EngineManager>::Global().Create(
unique_key, config);
LOG(INFO) << "Create EngineManager done";
ASSERT_EQ(
inference::Singleton<inference::lite::EngineManager>::Global().Empty(),
false);
ASSERT_EQ(inference::Singleton<inference::lite::EngineManager>::Global().Has(
unique_key),
true);
paddle::lite::Predictor* engine_0 =
inference::Singleton<inference::lite::EngineManager>::Global().Get(
unique_key);

CHECK_NOTNULL(engine_0);
inference::Singleton<inference::lite::EngineManager>::Global().DeleteAll();
CHECK(inference::Singleton<inference::lite::EngineManager>::Global().Get(
unique_key) == nullptr)
<< "the engine_0 should be nullptr";
}

} // namespace lite
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/operators/lite/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
op_library(lite_engine_op DEPS lite_engine lite_tensor_utils)
cc_test(test_lite_engine_op SRCS lite_engine_op_test.cc DEPS lite_engine_op analysis)
28 changes: 18 additions & 10 deletions paddle/fluid/operators/lite/lite_engine_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,19 @@ class LiteEngineOp : public framework::OperatorBase {
private:
std::vector<std::string> in_names_;
std::vector<std::string> out_names_;
paddle::lite::Predictor* engine_;
paddle::lite::Predictor *engine_;

public:
LiteEngineOp(const std::string &type,
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: framework::OperatorBase(type, inputs, outputs, attrs) {
in_names_ = Inputs("Xs");
out_names_ = Outputs("Ys");
engine_ = inference::Singleton<inference::lite::EngineManager>::Global()
.Get(Attr<std::string>("engine_key"));
engine_ =
inference::Singleton<inference::lite::EngineManager>::Global().Get(
Attr<std::string>("engine_key"));
}

protected:
Expand All @@ -61,15 +62,22 @@ class LiteEngineOp : public framework::OperatorBase {
void Execute(const framework::Scope &scope,
const platform::Place &dev_place) const {
for (size_t i = 0; i < in_names_.size(); i++) {
const framework::LoDTensor& src_t = inference::analysis::GetFromScope<framework::LoDTensor>(scope, in_names_[i]);
paddle::lite::Tensor* dst_t = engine_->GetInput(i);
const framework::LoDTensor &src_t =
inference::analysis::GetFromScope<framework::LoDTensor>(scope,
in_names_[i]);
paddle::lite::Tensor *dst_t = engine_->GetInput(i);
inference::lite::InitLiteTensorType(dst_t, src_t);
inference::lite::TensorCopy(dst_t, src_t);
}
engine_->Run();
cudaDeviceSynchronize();
for (size_t i = 0; i < out_names_.size(); i++) {
const paddle::lite::Tensor& src_t = *(engine_->GetOutput(i));
framework::LoDTensor* dst_t = &inference::analysis::GetFromScope<framework::LoDTensor>(scope, out_names_[i]);
const paddle::lite::Tensor &src_t = *(engine_->GetOutput(i));
framework::LoDTensor *dst_t =
&inference::analysis::GetFromScope<framework::LoDTensor>(
scope, out_names_[i]);
inference::lite::InitLiteTensorType(
&const_cast<paddle::lite::Tensor &>(src_t), *dst_t);
inference::lite::TensorCopy(dst_t, src_t);
}
}
Expand Down
Loading