diff --git a/paddle/fluid/inference/lite/CMakeLists.txt b/paddle/fluid/inference/lite/CMakeLists.txt index 23ce6ebc2c97db..f86ce73201a5ba 100644 --- a/paddle/fluid/inference/lite/CMakeLists.txt +++ b/paddle/fluid/inference/lite/CMakeLists.txt @@ -3,3 +3,4 @@ cc_library(lite_engine SRCS engine.cc DEPS lite_full_static framework_proto) cc_library(lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy lite_full_static framework_proto boost) cc_test(test_lite_engine SRCS test_engine.cc DEPS lite_engine protobuf framework_proto glog gtest analysis) cc_test(test_lite_tensor_utils SRCS test_tensor_utils.cc DEPS lite_engine paddle_fluid lite_tensor_utils) +cc_test(test_lite_predictor SRCS test_predictor.cc DEPS lite_engine paddle_fluid) diff --git a/paddle/fluid/inference/lite/test_engine.cc b/paddle/fluid/inference/lite/test_engine.cc index 990903f0560600..9c36f5a5975251 100644 --- a/paddle/fluid/inference/lite/test_engine.cc +++ b/paddle/fluid/inference/lite/test_engine.cc @@ -13,77 +13,107 @@ // limitations under the License. #include -#include -#include #include "lite/api/paddle_use_kernels.h" #include "lite/api/paddle_use_ops.h" #include "lite/api/paddle_use_passes.h" -#include "paddle/fluid/inference/lite/engine.h" -#include "paddle/fluid/inference/utils/singleton.h" - #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/inference/utils/singleton.h" + +#include "paddle/fluid/operators/lite/ut_helper.h" +// include engine.h must in the end +#include "paddle/fluid/inference/lite/engine.h" namespace paddle { +namespace inference { namespace lite { -namespace { +using inference::lite::AddTensorToBlockDesc; +using inference::lite::CreateTensor; +using inference::lite::serialize_params; -void AddTensorToBlockDesc(framework::proto::BlockDesc* block, - const std::string& name, - const std::vector& shape) { - using framework::proto::VarType; - auto* var = block->add_vars(); - framework::VarDesc desc(name); - desc.SetType(VarType::LOD_TENSOR); - desc.SetDataType(VarType::FP32); - desc.SetShape(shape); - *var = *desc.Proto(); -} +namespace { void make_fake_model(std::string* model, std::string* param) { framework::ProgramDesc program; + LOG(INFO) << "program.block size is " << program.Size(); auto* block_ = program.Proto()->mutable_blocks(0); LOG(INFO) << "create block desc"; framework::BlockDesc block_desc(&program, block_); - LOG(INFO) << "create feed op"; auto* feed0 = block_desc.AppendOp(); feed0->SetType("feed"); feed0->SetInput("X", {"feed"}); feed0->SetOutput("Out", {"x"}); - feed0->SetAttr("col", 1); - AddTensorToBlockDesc(block_, "x", std::vector({2, 4, 1, 1})); + feed0->SetAttr("col", 0); + auto* feed1 = block_desc.AppendOp(); + feed1->SetType("feed"); + feed1->SetInput("X", {"feed"}); + feed1->SetOutput("Out", {"y"}); + feed1->SetAttr("col", 1); + LOG(INFO) << "create elementwise_add op"; + auto* elt_add = block_desc.AppendOp(); + elt_add->SetType("elementwise_add"); + elt_add->SetInput("X", std::vector({"x"})); + elt_add->SetInput("Y", std::vector({"y"})); + elt_add->SetOutput("Out", std::vector({"z"})); + elt_add->SetAttr("axis", -1); + LOG(INFO) << "create fetch op"; + auto* fetch = block_desc.AppendOp(); + fetch->SetType("fetch"); + fetch->SetInput("X", std::vector({"z"})); + fetch->SetOutput("Out", std::vector({"out"})); + fetch->SetAttr("col", 0); + // Set inputs' variable shape in BlockDesc + AddTensorToBlockDesc(block_, "x", std::vector({2, 4}), true); + AddTensorToBlockDesc(block_, "y", std::vector({2, 4}), true); + AddTensorToBlockDesc(block_, "z", std::vector({2, 4}), false); + AddTensorToBlockDesc(block_, "out", std::vector({2, 4}), false); + *block_->add_ops() = *feed0->Proto(); - ASSERT_EQ(block_->ops_size(), 1); + *block_->add_ops() = *feed1->Proto(); + *block_->add_ops() = *elt_add->Proto(); + *block_->add_ops() = *fetch->Proto(); + framework::Scope scope; +#ifdef PADDLE_WITH_CUDA + platform::CUDAPlace place; + platform::CUDADeviceContext ctx(place); +#else platform::CPUPlace place; platform::CPUDeviceContext ctx(place); +#endif + // Prepare variables. + std::vector repetitive_params{"x", "y"}; + CreateTensor(&scope, "x", std::vector({2, 4})); + CreateTensor(&scope, "y", std::vector({2, 4})); + ASSERT_EQ(block_->ops_size(), 4); *model = program.Proto()->SerializeAsString(); + serialize_params(param, &scope, repetitive_params); } } // namespace -TEST(EngineManager, manual) { +TEST(EngineManager, engine) { ASSERT_EQ( inference::Singleton::Global().Empty(), true); inference::lite::EngineConfig config; make_fake_model(&(config.model), &(config.param)); + LOG(INFO) << "prepare config"; const std::string unique_key("engine_0"); config.model_from_memory = true; - config.prefer_place = {TARGET(kX86), PRECISION(kFloat)}; config.valid_places = { - paddle::lite::Place({TARGET(kX86), PRECISION(kFloat)}), - paddle::lite::Place({TARGET(kHost), PRECISION(kAny)}), #ifdef PADDLE_WITH_CUDA paddle::lite::Place({TARGET(kCUDA), PRECISION(kFloat)}), #endif + paddle::lite::Place({TARGET(kX86), PRECISION(kFloat)}), + paddle::lite::Place({TARGET(kHost), PRECISION(kAny)}), }; LOG(INFO) << "Create EngineManager"; @@ -108,4 +138,5 @@ TEST(EngineManager, manual) { } } // namespace lite +} // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/lite/test_predictor.cc b/paddle/fluid/inference/lite/test_predictor.cc new file mode 100644 index 00000000000000..3ca26a05e52117 --- /dev/null +++ b/paddle/fluid/inference/lite/test_predictor.cc @@ -0,0 +1,228 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "lite/api/paddle_use_kernels.h" +#include "lite/api/paddle_use_ops.h" +#include "lite/api/paddle_use_passes.h" + +#include "paddle/fluid/framework/block_desc.h" +#include "paddle/fluid/framework/op_desc.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/framework/scope.h" + +#include "paddle/fluid/inference/api/paddle_inference_api.h" +#include "paddle/fluid/operators/lite/ut_helper.h" +#include "paddle/fluid/platform/enforce.h" + +// int main() { +// LOG(INFO) << "leaky_relu"; +// paddle::AnalysisConfig config; +// // +// config.SetModel("/shixiaowei02/Paddle_lite/xingzhaolong/leaky_relu_model"); +// config.SetModel("/Paddle/models/lite/leaky_relu"); +// config.SwitchUseFeedFetchOps(false); +// config.EnableUseGpu(10, 0); +// config.EnableLiteEngine(paddle::AnalysisConfig::Precision::kFloat32); +// config.pass_builder()->TurnOnDebug(); +// +// auto predictor = CreatePaddlePredictor(config); +// PADDLE_ENFORCE_NOT_NULL(predictor.get()); +// +// const int batch_size = 1; +// const int channels = 1; +// const int height = 3; +// const int width = 3; +// float data[batch_size * channels * height * width] = {0.5, -0.5, 0, -0, 1, +// -1, 2, -2, 3}; +// +// auto input_names = predictor->GetInputNames(); +// auto input_t = predictor->GetInputTensor(input_names[0]); +// input_t->Reshape({batch_size, channels, height, width}); +// input_t->copy_from_cpu(data); +// +// CHECK(predictor->ZeroCopyRun()); +// +// std::vector out_data; +// auto output_names = predictor->GetOutputNames(); +// auto output_t = predictor->GetOutputTensor(output_names[0]); +// std::vector output_shape = output_t->shape(); +// int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, +// std::multiplies()); +// LOG(INFO) << "out_num is " << out_num; +// out_data.resize(out_num); +// output_t->copy_to_cpu(out_data.data()); +// return 0; +//} + +// int main() { +// +///* +// // for yolov3 +// LOG(INFO) << "yolo_v3"; +// paddle::AnalysisConfig config; +// config.SetModel("/Paddle/models/lite/yolov3_infer/__model__", +// "/Paddle/models/lite/yolov3_infer/__params__"); +// config.SwitchUseFeedFetchOps(false); +// config.EnableUseGpu(10, 3); +// config.EnableLiteEngine(paddle::AnalysisConfig::Precision::kFloat32); +// config.pass_builder()->TurnOnDebug(); +// +// auto predictor = CreatePaddlePredictor(config); +// PADDLE_ENFORCE_NOT_NULL(predictor.get()); +// +// const int batch_size = 1; +// const int channels = 3; +// const int height = 608; +// const int width = 608; +// // float *data = new float[batch_size * channels * height * width]; +// float data[batch_size * channels * height * width]; +// memset(data, 0, sizeof(float) * batch_size * channels * height * width); +// +// auto input_names = predictor->GetInputNames(); +// LOG(INFO) << input_names[0]; +// LOG(INFO) << input_names[1]; +// auto input_image = predictor->GetInputTensor(input_names[0]); +// input_image->Reshape({batch_size, channels, height, width}); +// input_image->copy_from_cpu(data); +// +// int im_size_data[2] = {608, 608}; +// auto input_size = predictor->GetInputTensor(input_names[1]); +// input_size->Reshape({1, 2}); +// input_size->copy_from_cpu(im_size_data); +// +// CHECK(predictor->ZeroCopyRun()); +// +// std::vector out_data; +// auto output_names = predictor->GetOutputNames(); +// auto output_t = predictor->GetOutputTensor(output_names[0]); +// std::vector output_shape = output_t->shape(); +// int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, +// std::multiplies()); +// LOG(INFO) << "out_num is " << out_num; +// out_data.resize(out_num); +// output_t->copy_to_cpu(out_data.data()); +// return 0; +//*/ +//} + +namespace paddle { +namespace lite { + +using inference::lite::AddTensorToBlockDesc; +using inference::lite::CreateTensor; +using inference::lite::serialize_params; + +namespace { + +void fake_mul(std::string* model, std::string* param) { + framework::ProgramDesc program; + auto* block_ = program.Proto()->mutable_blocks(0); + framework::BlockDesc block_desc(&program, block_); + + LOG(INFO) << "create feed op"; + auto* feed0 = block_desc.AppendOp(); + feed0->SetType("feed"); + feed0->SetInput("X", {"feed"}); + feed0->SetOutput("Out", {"x"}); + feed0->SetAttr("col", 0); + + LOG(INFO) << "create mul op"; + auto* mul = block_desc.AppendOp(); + mul->SetType("mul"); + mul->SetInput("X", std::vector({"x"})); + mul->SetInput("Y", std::vector({"weight"})); + mul->SetOutput("Out", std::vector({"y"})); + mul->SetAttr("y_num_col_dims", 1); + mul->SetAttr("x_num_col_dims", 1); + + LOG(INFO) << "create leaky_relu op"; + auto* leaky = block_desc.AppendOp(); + leaky->SetType("leaky_relu"); + leaky->SetInput("X", std::vector({"y"})); + leaky->SetOutput("Out", std::vector({"z"})); + leaky->SetAttr("alpha", 0.1f); + + LOG(INFO) << "create fetch op"; + auto* fetch = block_desc.AppendOp(); + fetch->SetType("fetch"); + fetch->SetInput("X", std::vector({"z"})); + fetch->SetOutput("Out", std::vector({"out"})); + fetch->SetAttr("col", 0); + + AddTensorToBlockDesc(block_, "x", std::vector({3, 4})); + AddTensorToBlockDesc(block_, "weight", std::vector({4, 2}), true); + AddTensorToBlockDesc(block_, "y", std::vector({3, 2})); + AddTensorToBlockDesc(block_, "z", std::vector({3, 2})); + AddTensorToBlockDesc(block_, "out", std::vector({3, 2})); + *block_->add_ops() = *feed0->Proto(); + *block_->add_ops() = *mul->Proto(); + *block_->add_ops() = *leaky->Proto(); + *block_->add_ops() = *fetch->Proto(); + *model = program.Proto()->SerializeAsString(); + + framework::Scope scope; + CreateTensor(&scope, "weight", std::vector({4, 2})); + serialize_params(param, &scope, {"weight"}); +} +} // namespace + +TEST(paddle_lite_subgraph, predictor) { + LOG(INFO) << "fake model"; +#ifdef PADDLE_WITH_CUDA + paddle::AnalysisConfig config; + std::string model, param = ""; + fake_mul(&model, ¶m); + config.SetModelBuffer(model.c_str(), model.size(), param.c_str(), + param.size()); + config.SwitchUseFeedFetchOps(false); + config.EnableUseGpu(10, 0); + config.EnableLiteEngine(paddle::AnalysisConfig::Precision::kFloat32); + config.pass_builder()->TurnOnDebug(); + + auto predictor = paddle::CreatePaddlePredictor(config); + PADDLE_ENFORCE_NOT_NULL(predictor.get()); + + const int height = 3; + const int width = 4; + float* data = new float[height * width]; + for (int i = 0; i < height * width; ++i) { + data[i] = i; + } + auto input_names = predictor->GetInputNames(); + auto input_t = predictor->GetInputTensor(input_names[0]); + input_t->Reshape({height, width}); + input_t->copy_from_cpu(data); + LOG(INFO) << "start to run!"; + CHECK(predictor->ZeroCopyRun()); + LOG(INFO) << "get output tensor!"; + std::vector out_data; + auto output_names = predictor->GetOutputNames(); + auto output_t = predictor->GetOutputTensor(output_names[0]); + std::vector output_shape = output_t->shape(); + int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, + std::multiplies()); + LOG(INFO) << "out_num is " << out_num; + out_data.resize(out_num); + output_t->copy_to_cpu(out_data.data()); + for (size_t i = 0; i < out_data.size(); ++i) { + LOG(INFO) << out_data[i]; + } +#endif +} +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/operators/lite/CMakeLists.txt b/paddle/fluid/operators/lite/CMakeLists.txt index 5bb7892590848a..3ba9a7d95c268e 100644 --- a/paddle/fluid/operators/lite/CMakeLists.txt +++ b/paddle/fluid/operators/lite/CMakeLists.txt @@ -1,2 +1,3 @@ op_library(lite_engine_op DEPS lite_engine lite_tensor_utils) -cc_test(test_lite_engine_op SRCS lite_engine_op_test.cc DEPS lite_engine_op analysis) +#cc_test(test_lite_engine_op SRCS lite_engine_op_test.cc DEPS lite_engine_op analysis) +cc_test(test_lite_engine_op SRCS lite_engine_op_test.cc DEPS lite_engine_op paddle_fluid) diff --git a/paddle/fluid/operators/lite/lite_engine_op_test.cc b/paddle/fluid/operators/lite/lite_engine_op_test.cc index 91c4fec461cf87..b9d3fe734441af 100644 --- a/paddle/fluid/operators/lite/lite_engine_op_test.cc +++ b/paddle/fluid/operators/lite/lite_engine_op_test.cc @@ -18,53 +18,38 @@ #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/inference/utils/singleton.h" -#include "paddle/fluid/operators/lite/lite_engine_op.h" -#include "paddle/fluid/operators/lite/ut_helper.h" #include "lite/api/paddle_use_kernels.h" #include "lite/api/paddle_use_ops.h" #include "lite/api/paddle_use_passes.h" +// must be this order +#include "paddle/fluid/inference/utils/singleton.h" +#include "paddle/fluid/operators/lite/lite_engine_op.h" +#include "paddle/fluid/operators/lite/ut_helper.h" USE_NO_KERNEL_OP(lite_engine) -namespace paddle { -namespace operators { -namespace { -void CreateTensor(framework::Scope* scope, const std::string& name, - const std::vector& shape) { - auto* var = scope->Var(name); - auto* tensor = var->GetMutable(); - auto dims = framework::make_ddim(shape); - tensor->Resize(dims); -#ifdef PADDLE_WITH_CUDA - platform::CUDAPlace place; -#else - platform::CPUPlace place; -#endif - inference::lite::RandomizeTensor(tensor, place); -} +using paddle::inference::lite::AddTensorToBlockDesc; +using paddle::inference::lite::CreateTensor; +using paddle::inference::lite::serialize_params; -void AddTensorToBlockDesc(framework::proto::BlockDesc* block, - const std::string& name, - const std::vector& shape, bool persistable) { - using framework::proto::VarType; - auto* var = block->add_vars(); - framework::VarDesc desc(name); - desc.SetType(VarType::LOD_TENSOR); - desc.SetDataType(VarType::FP32); - desc.SetShape(shape); - desc.SetPersistable(persistable); - *var = *desc.Proto(); -} -} // namespace +namespace paddle { +namespace operators { -TEST(LiteEngineOp, manual) { +TEST(LiteEngineOp, engine_op) { framework::ProgramDesc program; auto* block_ = program.Proto()->mutable_blocks(0); - - LOG(INFO) << "create block desc"; framework::BlockDesc block_desc(&program, block_); + auto* feed0 = block_desc.AppendOp(); + feed0->SetType("feed"); + feed0->SetInput("X", {"feed"}); + feed0->SetOutput("Out", {"x"}); + feed0->SetAttr("col", 0); + auto* feed1 = block_desc.AppendOp(); + feed1->SetType("feed"); + feed1->SetInput("X", {"feed"}); + feed1->SetOutput("Out", {"y"}); + feed1->SetAttr("col", 1); LOG(INFO) << "create elementwise_add op"; auto* elt_add = block_desc.AppendOp(); elt_add->SetType("elementwise_add"); @@ -84,6 +69,8 @@ TEST(LiteEngineOp, manual) { AddTensorToBlockDesc(block_, "z", std::vector({2, 4}), false); AddTensorToBlockDesc(block_, "out", std::vector({2, 4}), false); + *block_->add_ops() = *feed1->Proto(); + *block_->add_ops() = *feed0->Proto(); *block_->add_ops() = *elt_add->Proto(); *block_->add_ops() = *fetch->Proto(); @@ -98,44 +85,18 @@ TEST(LiteEngineOp, manual) { // Prepare variables. CreateTensor(&scope, "x", std::vector({2, 4})); CreateTensor(&scope, "y", std::vector({2, 4})); - CreateTensor(&scope, "z", std::vector({2, 4})); CreateTensor(&scope, "out", std::vector({2, 4})); - ASSERT_EQ(block_->ops_size(), 2); + ASSERT_EQ(block_->ops_size(), 4); - auto serialize_params = [](std::string* str, framework::Scope* scope, - const std::vector& params) { - std::ostringstream os; -#ifdef PADDLE_WITH_CUDA - platform::CUDAPlace place; - platform::CUDADeviceContext ctx(place); -#else - platform::CPUDeviceContext ctx; -#endif - for (const auto& param : params) { - PADDLE_ENFORCE_NOT_NULL(scope->FindVar(param), - "Block should already have a '%s' variable", - param); - auto* tensor = scope->FindVar(param)->GetMutable(); - framework::SerializeToStream(os, *tensor, ctx); - } - *str = os.str(); - }; std::vector repetitive_params{"x", "y"}; inference::lite::EngineConfig config; - config.prefer_place = { -#ifdef PADDLE_WITH_CUDA - TARGET(kCUDA), PRECISION(kFloat), -#else - TARGET(kX86), PRECISION(kFloat) -#endif - }; config.valid_places = { - paddle::lite::Place({TARGET(kHost), PRECISION(kAny)}), - paddle::lite::Place({TARGET(kX86), PRECISION(kFloat)}), #ifdef PADDLE_WITH_CUDA paddle::lite::Place({TARGET(kCUDA), PRECISION(kFloat)}), #endif + paddle::lite::Place({TARGET(kHost), PRECISION(kAny)}), + paddle::lite::Place({TARGET(kX86), PRECISION(kFloat)}), }; serialize_params(&(config.param), &scope, repetitive_params); config.model = program.Proto()->SerializeAsString(); @@ -147,6 +108,8 @@ TEST(LiteEngineOp, manual) { engine_op_desc.SetOutput("Ys", std::vector({"out"})); std::string engine_key = "engine_0"; engine_op_desc.SetAttr("engine_key", engine_key); + engine_op_desc.SetAttr("enable_int8", false); + engine_op_desc.SetAttr("use_gpu", true); engine_op_desc.SetBlockAttr("sub_block", &block_desc); inference::Singleton::Global().Create( @@ -159,6 +122,7 @@ TEST(LiteEngineOp, manual) { // Execute them. LOG(INFO) << "engine_op run"; engine_op->Run(scope, place); + LOG(INFO) << "done"; } } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/lite/ut_helper.h b/paddle/fluid/operators/lite/ut_helper.h index cad8c411b82390..3ec842d13f1279 100644 --- a/paddle/fluid/operators/lite/ut_helper.h +++ b/paddle/fluid/operators/lite/ut_helper.h @@ -14,6 +14,8 @@ #pragma once #include +#include +#include #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" @@ -24,6 +26,38 @@ namespace paddle { namespace inference { namespace lite { +void AddTensorToBlockDesc(framework::proto::BlockDesc* block, + const std::string& name, + const std::vector& shape, + bool persistable = false) { + using framework::proto::VarType; + auto* var = block->add_vars(); + framework::VarDesc desc(name); + desc.SetType(VarType::LOD_TENSOR); + desc.SetDataType(VarType::FP32); + desc.SetShape(shape); + desc.SetPersistable(persistable); + *var = *desc.Proto(); +} + +void serialize_params(std::string* str, framework::Scope* scope, + const std::vector& params) { + std::ostringstream os; +#ifdef PADDLE_WITH_CUDA + platform::CUDAPlace place; + platform::CUDADeviceContext ctx(place); +#else + platform::CPUDeviceContext ctx; +#endif + for (const auto& param : params) { + PADDLE_ENFORCE_NOT_NULL(scope->FindVar(param), + "Block should already have a '%s' variable", param); + auto* tensor = scope->FindVar(param)->GetMutable(); + framework::SerializeToStream(os, *tensor, ctx); + } + *str = os.str(); +} + /* * Get a random float value between [low, high] */ @@ -47,10 +81,26 @@ void RandomizeTensor(framework::LoDTensor* tensor, for (size_t i = 0; i < num_elements; i++) { *(temp_data + i) = random(0., 1.); + LOG(INFO) << "weights: " << *(temp_data + i); } TensorCopySync(temp_tensor, place, tensor); } + +void CreateTensor(framework::Scope* scope, const std::string& name, + const std::vector& shape) { + auto* var = scope->Var(name); + auto* tensor = var->GetMutable(); + auto dims = framework::make_ddim(shape); + tensor->Resize(dims); +#ifdef PADDLE_WITH_CUDA + platform::CUDAPlace place; +#else + platform::CPUPlace place; +#endif + RandomizeTensor(tensor, place); +} + } // namespace lite } // namespace inference } // namespace paddle