diff --git a/paddle/fluid/inference/lite/CMakeLists.txt b/paddle/fluid/inference/lite/CMakeLists.txt
index 23ce6ebc2c97db..f86ce73201a5ba 100644
--- a/paddle/fluid/inference/lite/CMakeLists.txt
+++ b/paddle/fluid/inference/lite/CMakeLists.txt
@@ -3,3 +3,4 @@ cc_library(lite_engine SRCS engine.cc DEPS lite_full_static framework_proto)
 cc_library(lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy lite_full_static framework_proto boost)
 cc_test(test_lite_engine SRCS test_engine.cc DEPS lite_engine protobuf framework_proto glog gtest analysis)
 cc_test(test_lite_tensor_utils SRCS test_tensor_utils.cc DEPS lite_engine paddle_fluid lite_tensor_utils)
+cc_test(test_lite_predictor SRCS test_predictor.cc DEPS lite_engine paddle_fluid)
diff --git a/paddle/fluid/inference/lite/test_engine.cc b/paddle/fluid/inference/lite/test_engine.cc
index 990903f0560600..9c36f5a5975251 100644
--- a/paddle/fluid/inference/lite/test_engine.cc
+++ b/paddle/fluid/inference/lite/test_engine.cc
@@ -13,77 +13,107 @@
 // limitations under the License.
 
 #include <gtest/gtest.h>
-#include <fstream>
-#include <ios>
 
 #include "lite/api/paddle_use_kernels.h"
 #include "lite/api/paddle_use_ops.h"
 #include "lite/api/paddle_use_passes.h"
 
-#include "paddle/fluid/inference/lite/engine.h"
-#include "paddle/fluid/inference/utils/singleton.h"
-
 #include "paddle/fluid/framework/block_desc.h"
 #include "paddle/fluid/framework/op_desc.h"
 #include "paddle/fluid/framework/program_desc.h"
 #include "paddle/fluid/framework/scope.h"
+#include "paddle/fluid/inference/utils/singleton.h"
+
+#include "paddle/fluid/operators/lite/ut_helper.h"
+// include engine.h must in the end
+#include "paddle/fluid/inference/lite/engine.h"
 
 namespace paddle {
+namespace inference {
 namespace lite {
 
-namespace {
+using inference::lite::AddTensorToBlockDesc;
+using inference::lite::CreateTensor;
+using inference::lite::serialize_params;
 
-void AddTensorToBlockDesc(framework::proto::BlockDesc* block,
-                          const std::string& name,
-                          const std::vector<int64_t>& shape) {
-  using framework::proto::VarType;
-  auto* var = block->add_vars();
-  framework::VarDesc desc(name);
-  desc.SetType(VarType::LOD_TENSOR);
-  desc.SetDataType(VarType::FP32);
-  desc.SetShape(shape);
-  *var = *desc.Proto();
-}
+namespace {
 
 void make_fake_model(std::string* model, std::string* param) {
   framework::ProgramDesc program;
+  LOG(INFO) << "program.block size is " << program.Size();
   auto* block_ = program.Proto()->mutable_blocks(0);
   LOG(INFO) << "create block desc";
   framework::BlockDesc block_desc(&program, block_);
-  LOG(INFO) << "create feed op";
   auto* feed0 = block_desc.AppendOp();
   feed0->SetType("feed");
   feed0->SetInput("X", {"feed"});
   feed0->SetOutput("Out", {"x"});
-  feed0->SetAttr("col", 1);
-  AddTensorToBlockDesc(block_, "x", std::vector<int64_t>({2, 4, 1, 1}));
+  feed0->SetAttr("col", 0);
+  auto* feed1 = block_desc.AppendOp();
+  feed1->SetType("feed");
+  feed1->SetInput("X", {"feed"});
+  feed1->SetOutput("Out", {"y"});
+  feed1->SetAttr("col", 1);
+  LOG(INFO) << "create elementwise_add op";
+  auto* elt_add = block_desc.AppendOp();
+  elt_add->SetType("elementwise_add");
+  elt_add->SetInput("X", std::vector<std::string>({"x"}));
+  elt_add->SetInput("Y", std::vector<std::string>({"y"}));
+  elt_add->SetOutput("Out", std::vector<std::string>({"z"}));
+  elt_add->SetAttr("axis", -1);
+  LOG(INFO) << "create fetch op";
+  auto* fetch = block_desc.AppendOp();
+  fetch->SetType("fetch");
+  fetch->SetInput("X", std::vector<std::string>({"z"}));
+  fetch->SetOutput("Out", std::vector<std::string>({"out"}));
+  fetch->SetAttr("col", 0);
+  // Set inputs' variable shape in BlockDesc
+  AddTensorToBlockDesc(block_, "x", std::vector<int64_t>({2, 4}), true);
+  AddTensorToBlockDesc(block_, "y", std::vector<int64_t>({2, 4}), true);
+  AddTensorToBlockDesc(block_, "z", std::vector<int64_t>({2, 4}), false);
+  AddTensorToBlockDesc(block_, "out", std::vector<int64_t>({2, 4}), false);
+
   *block_->add_ops() = *feed0->Proto();
-  ASSERT_EQ(block_->ops_size(), 1);
+  *block_->add_ops() = *feed1->Proto();
+  *block_->add_ops() = *elt_add->Proto();
+  *block_->add_ops() = *fetch->Proto();
+
   framework::Scope scope;
+#ifdef PADDLE_WITH_CUDA
+  platform::CUDAPlace place;
+  platform::CUDADeviceContext ctx(place);
+#else
   platform::CPUPlace place;
   platform::CPUDeviceContext ctx(place);
+#endif
+  // Prepare variables.
+  std::vector<std::string> repetitive_params{"x", "y"};
+  CreateTensor(&scope, "x", std::vector<int64_t>({2, 4}));
+  CreateTensor(&scope, "y", std::vector<int64_t>({2, 4}));
+  ASSERT_EQ(block_->ops_size(), 4);
   *model = program.Proto()->SerializeAsString();
+  serialize_params(param, &scope, repetitive_params);
 }
 
 }  // namespace
 
-TEST(EngineManager, manual) {
+TEST(EngineManager, engine) {
   ASSERT_EQ(
       inference::Singleton<inference::lite::EngineManager>::Global().Empty(),
       true);
 
   inference::lite::EngineConfig config;
   make_fake_model(&(config.model), &(config.param));
+  LOG(INFO) << "prepare config";
 
   const std::string unique_key("engine_0");
   config.model_from_memory = true;
-  config.prefer_place = {TARGET(kX86), PRECISION(kFloat)};
   config.valid_places = {
-      paddle::lite::Place({TARGET(kX86), PRECISION(kFloat)}),
-      paddle::lite::Place({TARGET(kHost), PRECISION(kAny)}),
 #ifdef PADDLE_WITH_CUDA
       paddle::lite::Place({TARGET(kCUDA), PRECISION(kFloat)}),
 #endif
+      paddle::lite::Place({TARGET(kX86), PRECISION(kFloat)}),
+      paddle::lite::Place({TARGET(kHost), PRECISION(kAny)}),
   };
 
   LOG(INFO) << "Create EngineManager";
@@ -108,4 +138,5 @@ TEST(EngineManager, manual) {
 }
 
 }  // namespace lite
+}  // namespace inference
 }  // namespace paddle
diff --git a/paddle/fluid/inference/lite/test_predictor.cc b/paddle/fluid/inference/lite/test_predictor.cc
new file mode 100644
index 00000000000000..3ca26a05e52117
--- /dev/null
+++ b/paddle/fluid/inference/lite/test_predictor.cc
@@ -0,0 +1,228 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gtest/gtest.h>
+#include <fstream>
+#include <ios>
+
+#include "lite/api/paddle_use_kernels.h"
+#include "lite/api/paddle_use_ops.h"
+#include "lite/api/paddle_use_passes.h"
+
+#include "paddle/fluid/framework/block_desc.h"
+#include "paddle/fluid/framework/op_desc.h"
+#include "paddle/fluid/framework/program_desc.h"
+#include "paddle/fluid/framework/scope.h"
+
+#include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/fluid/operators/lite/ut_helper.h"
+#include "paddle/fluid/platform/enforce.h"
+
+// int main() {
+//  LOG(INFO) << "leaky_relu";
+//  paddle::AnalysisConfig config;
+//  //
+//  config.SetModel("/shixiaowei02/Paddle_lite/xingzhaolong/leaky_relu_model");
+//  config.SetModel("/Paddle/models/lite/leaky_relu");
+//  config.SwitchUseFeedFetchOps(false);
+//  config.EnableUseGpu(10, 0);
+//  config.EnableLiteEngine(paddle::AnalysisConfig::Precision::kFloat32);
+//  config.pass_builder()->TurnOnDebug();
+//
+//  auto predictor = CreatePaddlePredictor(config);
+//  PADDLE_ENFORCE_NOT_NULL(predictor.get());
+//
+//  const int batch_size = 1;
+//  const int channels = 1;
+//  const int height = 3;
+//  const int width = 3;
+//  float data[batch_size * channels * height * width] = {0.5, -0.5, 0,  -0, 1,
+//                                                        -1,  2,    -2, 3};
+//
+//  auto input_names = predictor->GetInputNames();
+//  auto input_t = predictor->GetInputTensor(input_names[0]);
+//  input_t->Reshape({batch_size, channels, height, width});
+//  input_t->copy_from_cpu(data);
+//
+//  CHECK(predictor->ZeroCopyRun());
+//
+//  std::vector<float> out_data;
+//  auto output_names = predictor->GetOutputNames();
+//  auto output_t = predictor->GetOutputTensor(output_names[0]);
+//  std::vector<int> output_shape = output_t->shape();
+//  int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
+//                                std::multiplies<int>());
+//  LOG(INFO) << "out_num is " << out_num;
+//  out_data.resize(out_num);
+//  output_t->copy_to_cpu(out_data.data());
+//  return 0;
+//}
+
+// int main() {
+//
+///*
+//    // for yolov3
+//    LOG(INFO) << "yolo_v3";
+//    paddle::AnalysisConfig config;
+//    config.SetModel("/Paddle/models/lite/yolov3_infer/__model__",
+//    "/Paddle/models/lite/yolov3_infer/__params__");
+//    config.SwitchUseFeedFetchOps(false);
+//    config.EnableUseGpu(10, 3);
+//    config.EnableLiteEngine(paddle::AnalysisConfig::Precision::kFloat32);
+//    config.pass_builder()->TurnOnDebug();
+//
+//    auto predictor = CreatePaddlePredictor(config);
+//    PADDLE_ENFORCE_NOT_NULL(predictor.get());
+//
+//    const int batch_size = 1;
+//    const int channels = 3;
+//    const int height = 608;
+//    const int width = 608;
+//    // float *data = new float[batch_size * channels * height * width];
+//    float data[batch_size * channels * height * width];
+//    memset(data, 0, sizeof(float) * batch_size * channels * height * width);
+//
+//    auto input_names = predictor->GetInputNames();
+//    LOG(INFO) << input_names[0];
+//    LOG(INFO) << input_names[1];
+//    auto input_image = predictor->GetInputTensor(input_names[0]);
+//    input_image->Reshape({batch_size, channels, height, width});
+//    input_image->copy_from_cpu(data);
+//
+//    int im_size_data[2] = {608, 608};
+//    auto input_size = predictor->GetInputTensor(input_names[1]);
+//    input_size->Reshape({1, 2});
+//    input_size->copy_from_cpu(im_size_data);
+//
+//    CHECK(predictor->ZeroCopyRun());
+//
+//    std::vector<float> out_data;
+//    auto output_names = predictor->GetOutputNames();
+//    auto output_t = predictor->GetOutputTensor(output_names[0]);
+//    std::vector<int> output_shape = output_t->shape();
+//    int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
+//                                  std::multiplies<int>());
+//    LOG(INFO) << "out_num is " << out_num;
+//    out_data.resize(out_num);
+//    output_t->copy_to_cpu(out_data.data());
+//    return 0;
+//*/
+//}
+
+namespace paddle {
+namespace lite {
+
+using inference::lite::AddTensorToBlockDesc;
+using inference::lite::CreateTensor;
+using inference::lite::serialize_params;
+
+namespace {
+
+void fake_mul(std::string* model, std::string* param) {
+  framework::ProgramDesc program;
+  auto* block_ = program.Proto()->mutable_blocks(0);
+  framework::BlockDesc block_desc(&program, block_);
+
+  LOG(INFO) << "create feed op";
+  auto* feed0 = block_desc.AppendOp();
+  feed0->SetType("feed");
+  feed0->SetInput("X", {"feed"});
+  feed0->SetOutput("Out", {"x"});
+  feed0->SetAttr("col", 0);
+
+  LOG(INFO) << "create mul op";
+  auto* mul = block_desc.AppendOp();
+  mul->SetType("mul");
+  mul->SetInput("X", std::vector<std::string>({"x"}));
+  mul->SetInput("Y", std::vector<std::string>({"weight"}));
+  mul->SetOutput("Out", std::vector<std::string>({"y"}));
+  mul->SetAttr("y_num_col_dims", 1);
+  mul->SetAttr("x_num_col_dims", 1);
+
+  LOG(INFO) << "create leaky_relu op";
+  auto* leaky = block_desc.AppendOp();
+  leaky->SetType("leaky_relu");
+  leaky->SetInput("X", std::vector<std::string>({"y"}));
+  leaky->SetOutput("Out", std::vector<std::string>({"z"}));
+  leaky->SetAttr("alpha", 0.1f);
+
+  LOG(INFO) << "create fetch op";
+  auto* fetch = block_desc.AppendOp();
+  fetch->SetType("fetch");
+  fetch->SetInput("X", std::vector<std::string>({"z"}));
+  fetch->SetOutput("Out", std::vector<std::string>({"out"}));
+  fetch->SetAttr("col", 0);
+
+  AddTensorToBlockDesc(block_, "x", std::vector<int64_t>({3, 4}));
+  AddTensorToBlockDesc(block_, "weight", std::vector<int64_t>({4, 2}), true);
+  AddTensorToBlockDesc(block_, "y", std::vector<int64_t>({3, 2}));
+  AddTensorToBlockDesc(block_, "z", std::vector<int64_t>({3, 2}));
+  AddTensorToBlockDesc(block_, "out", std::vector<int64_t>({3, 2}));
+  *block_->add_ops() = *feed0->Proto();
+  *block_->add_ops() = *mul->Proto();
+  *block_->add_ops() = *leaky->Proto();
+  *block_->add_ops() = *fetch->Proto();
+  *model = program.Proto()->SerializeAsString();
+
+  framework::Scope scope;
+  CreateTensor(&scope, "weight", std::vector<int64_t>({4, 2}));
+  serialize_params(param, &scope, {"weight"});
+}
+}  // namespace
+
+TEST(paddle_lite_subgraph, predictor) {
+  LOG(INFO) << "fake model";
+#ifdef PADDLE_WITH_CUDA
+  paddle::AnalysisConfig config;
+  std::string model, param = "";
+  fake_mul(&model, &param);
+  config.SetModelBuffer(model.c_str(), model.size(), param.c_str(),
+                        param.size());
+  config.SwitchUseFeedFetchOps(false);
+  config.EnableUseGpu(10, 0);
+  config.EnableLiteEngine(paddle::AnalysisConfig::Precision::kFloat32);
+  config.pass_builder()->TurnOnDebug();
+
+  auto predictor = paddle::CreatePaddlePredictor(config);
+  PADDLE_ENFORCE_NOT_NULL(predictor.get());
+
+  const int height = 3;
+  const int width = 4;
+  float* data = new float[height * width];
+  for (int i = 0; i < height * width; ++i) {
+    data[i] = i;
+  }
+  auto input_names = predictor->GetInputNames();
+  auto input_t = predictor->GetInputTensor(input_names[0]);
+  input_t->Reshape({height, width});
+  input_t->copy_from_cpu(data);
+  LOG(INFO) << "start to run!";
+  CHECK(predictor->ZeroCopyRun());
+  LOG(INFO) << "get output tensor!";
+  std::vector<float> out_data;
+  auto output_names = predictor->GetOutputNames();
+  auto output_t = predictor->GetOutputTensor(output_names[0]);
+  std::vector<int> output_shape = output_t->shape();
+  int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
+                                std::multiplies<int>());
+  LOG(INFO) << "out_num is " << out_num;
+  out_data.resize(out_num);
+  output_t->copy_to_cpu(out_data.data());
+  for (size_t i = 0; i < out_data.size(); ++i) {
+    LOG(INFO) << out_data[i];
+  }
+#endif
+}
+}  // namespace lite
+}  // namespace paddle
diff --git a/paddle/fluid/operators/lite/CMakeLists.txt b/paddle/fluid/operators/lite/CMakeLists.txt
index 5bb7892590848a..3ba9a7d95c268e 100644
--- a/paddle/fluid/operators/lite/CMakeLists.txt
+++ b/paddle/fluid/operators/lite/CMakeLists.txt
@@ -1,2 +1,3 @@
 op_library(lite_engine_op DEPS lite_engine lite_tensor_utils)
-cc_test(test_lite_engine_op SRCS lite_engine_op_test.cc DEPS lite_engine_op analysis)
+#cc_test(test_lite_engine_op SRCS lite_engine_op_test.cc DEPS lite_engine_op analysis)
+cc_test(test_lite_engine_op SRCS lite_engine_op_test.cc DEPS lite_engine_op paddle_fluid)
diff --git a/paddle/fluid/operators/lite/lite_engine_op_test.cc b/paddle/fluid/operators/lite/lite_engine_op_test.cc
index 91c4fec461cf87..b9d3fe734441af 100644
--- a/paddle/fluid/operators/lite/lite_engine_op_test.cc
+++ b/paddle/fluid/operators/lite/lite_engine_op_test.cc
@@ -18,53 +18,38 @@
 #include "paddle/fluid/framework/op_desc.h"
 #include "paddle/fluid/framework/program_desc.h"
 #include "paddle/fluid/framework/scope.h"
-#include "paddle/fluid/inference/utils/singleton.h"
-#include "paddle/fluid/operators/lite/lite_engine_op.h"
-#include "paddle/fluid/operators/lite/ut_helper.h"
 
 #include "lite/api/paddle_use_kernels.h"
 #include "lite/api/paddle_use_ops.h"
 #include "lite/api/paddle_use_passes.h"
+// must be this order
+#include "paddle/fluid/inference/utils/singleton.h"
+#include "paddle/fluid/operators/lite/lite_engine_op.h"
+#include "paddle/fluid/operators/lite/ut_helper.h"
 
 USE_NO_KERNEL_OP(lite_engine)
-namespace paddle {
-namespace operators {
 
-namespace {
-void CreateTensor(framework::Scope* scope, const std::string& name,
-                  const std::vector<int64_t>& shape) {
-  auto* var = scope->Var(name);
-  auto* tensor = var->GetMutable<framework::LoDTensor>();
-  auto dims = framework::make_ddim(shape);
-  tensor->Resize(dims);
-#ifdef PADDLE_WITH_CUDA
-  platform::CUDAPlace place;
-#else
-  platform::CPUPlace place;
-#endif
-  inference::lite::RandomizeTensor(tensor, place);
-}
+using paddle::inference::lite::AddTensorToBlockDesc;
+using paddle::inference::lite::CreateTensor;
+using paddle::inference::lite::serialize_params;
 
-void AddTensorToBlockDesc(framework::proto::BlockDesc* block,
-                          const std::string& name,
-                          const std::vector<int64_t>& shape, bool persistable) {
-  using framework::proto::VarType;
-  auto* var = block->add_vars();
-  framework::VarDesc desc(name);
-  desc.SetType(VarType::LOD_TENSOR);
-  desc.SetDataType(VarType::FP32);
-  desc.SetShape(shape);
-  desc.SetPersistable(persistable);
-  *var = *desc.Proto();
-}
-}  // namespace
+namespace paddle {
+namespace operators {
 
-TEST(LiteEngineOp, manual) {
+TEST(LiteEngineOp, engine_op) {
   framework::ProgramDesc program;
   auto* block_ = program.Proto()->mutable_blocks(0);
-
-  LOG(INFO) << "create block desc";
   framework::BlockDesc block_desc(&program, block_);
+  auto* feed0 = block_desc.AppendOp();
+  feed0->SetType("feed");
+  feed0->SetInput("X", {"feed"});
+  feed0->SetOutput("Out", {"x"});
+  feed0->SetAttr("col", 0);
+  auto* feed1 = block_desc.AppendOp();
+  feed1->SetType("feed");
+  feed1->SetInput("X", {"feed"});
+  feed1->SetOutput("Out", {"y"});
+  feed1->SetAttr("col", 1);
   LOG(INFO) << "create elementwise_add op";
   auto* elt_add = block_desc.AppendOp();
   elt_add->SetType("elementwise_add");
@@ -84,6 +69,8 @@ TEST(LiteEngineOp, manual) {
   AddTensorToBlockDesc(block_, "z", std::vector<int64_t>({2, 4}), false);
   AddTensorToBlockDesc(block_, "out", std::vector<int64_t>({2, 4}), false);
 
+  *block_->add_ops() = *feed1->Proto();
+  *block_->add_ops() = *feed0->Proto();
   *block_->add_ops() = *elt_add->Proto();
   *block_->add_ops() = *fetch->Proto();
 
@@ -98,44 +85,18 @@ TEST(LiteEngineOp, manual) {
   // Prepare variables.
   CreateTensor(&scope, "x", std::vector<int64_t>({2, 4}));
   CreateTensor(&scope, "y", std::vector<int64_t>({2, 4}));
-  CreateTensor(&scope, "z", std::vector<int64_t>({2, 4}));
   CreateTensor(&scope, "out", std::vector<int64_t>({2, 4}));
 
-  ASSERT_EQ(block_->ops_size(), 2);
+  ASSERT_EQ(block_->ops_size(), 4);
 
-  auto serialize_params = [](std::string* str, framework::Scope* scope,
-                             const std::vector<std::string>& params) {
-    std::ostringstream os;
-#ifdef PADDLE_WITH_CUDA
-    platform::CUDAPlace place;
-    platform::CUDADeviceContext ctx(place);
-#else
-    platform::CPUDeviceContext ctx;
-#endif
-    for (const auto& param : params) {
-      PADDLE_ENFORCE_NOT_NULL(scope->FindVar(param),
-                              "Block should already have a '%s' variable",
-                              param);
-      auto* tensor = scope->FindVar(param)->GetMutable<framework::LoDTensor>();
-      framework::SerializeToStream(os, *tensor, ctx);
-    }
-    *str = os.str();
-  };
   std::vector<std::string> repetitive_params{"x", "y"};
   inference::lite::EngineConfig config;
-  config.prefer_place = {
-#ifdef PADDLE_WITH_CUDA
-      TARGET(kCUDA), PRECISION(kFloat),
-#else
-      TARGET(kX86), PRECISION(kFloat)
-#endif
-  };
   config.valid_places = {
-      paddle::lite::Place({TARGET(kHost), PRECISION(kAny)}),
-      paddle::lite::Place({TARGET(kX86), PRECISION(kFloat)}),
 #ifdef PADDLE_WITH_CUDA
       paddle::lite::Place({TARGET(kCUDA), PRECISION(kFloat)}),
 #endif
+      paddle::lite::Place({TARGET(kHost), PRECISION(kAny)}),
+      paddle::lite::Place({TARGET(kX86), PRECISION(kFloat)}),
   };
   serialize_params(&(config.param), &scope, repetitive_params);
   config.model = program.Proto()->SerializeAsString();
@@ -147,6 +108,8 @@ TEST(LiteEngineOp, manual) {
   engine_op_desc.SetOutput("Ys", std::vector<std::string>({"out"}));
   std::string engine_key = "engine_0";
   engine_op_desc.SetAttr("engine_key", engine_key);
+  engine_op_desc.SetAttr("enable_int8", false);
+  engine_op_desc.SetAttr("use_gpu", true);
   engine_op_desc.SetBlockAttr("sub_block", &block_desc);
 
   inference::Singleton<inference::lite::EngineManager>::Global().Create(
@@ -159,6 +122,7 @@ TEST(LiteEngineOp, manual) {
   // Execute them.
   LOG(INFO) << "engine_op run";
   engine_op->Run(scope, place);
+  LOG(INFO) << "done";
 }
 }  // namespace operators
 }  // namespace paddle
diff --git a/paddle/fluid/operators/lite/ut_helper.h b/paddle/fluid/operators/lite/ut_helper.h
index cad8c411b82390..3ec842d13f1279 100644
--- a/paddle/fluid/operators/lite/ut_helper.h
+++ b/paddle/fluid/operators/lite/ut_helper.h
@@ -14,6 +14,8 @@
 #pragma once
 
 #include <gtest/gtest.h>
+#include <string>
+#include <vector>
 #include "paddle/fluid/framework/block_desc.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/op_registry.h"
@@ -24,6 +26,38 @@ namespace paddle {
 namespace inference {
 namespace lite {
 
+void AddTensorToBlockDesc(framework::proto::BlockDesc* block,
+                          const std::string& name,
+                          const std::vector<int64_t>& shape,
+                          bool persistable = false) {
+  using framework::proto::VarType;
+  auto* var = block->add_vars();
+  framework::VarDesc desc(name);
+  desc.SetType(VarType::LOD_TENSOR);
+  desc.SetDataType(VarType::FP32);
+  desc.SetShape(shape);
+  desc.SetPersistable(persistable);
+  *var = *desc.Proto();
+}
+
+void serialize_params(std::string* str, framework::Scope* scope,
+                      const std::vector<std::string>& params) {
+  std::ostringstream os;
+#ifdef PADDLE_WITH_CUDA
+  platform::CUDAPlace place;
+  platform::CUDADeviceContext ctx(place);
+#else
+  platform::CPUDeviceContext ctx;
+#endif
+  for (const auto& param : params) {
+    PADDLE_ENFORCE_NOT_NULL(scope->FindVar(param),
+                            "Block should already have a '%s' variable", param);
+    auto* tensor = scope->FindVar(param)->GetMutable<framework::LoDTensor>();
+    framework::SerializeToStream(os, *tensor, ctx);
+  }
+  *str = os.str();
+}
+
 /*
  * Get a random float value between [low, high]
  */
@@ -47,10 +81,26 @@ void RandomizeTensor(framework::LoDTensor* tensor,
 
   for (size_t i = 0; i < num_elements; i++) {
     *(temp_data + i) = random(0., 1.);
+    LOG(INFO) << "weights: " << *(temp_data + i);
   }
 
   TensorCopySync(temp_tensor, place, tensor);
 }
+
+void CreateTensor(framework::Scope* scope, const std::string& name,
+                  const std::vector<int64_t>& shape) {
+  auto* var = scope->Var(name);
+  auto* tensor = var->GetMutable<framework::LoDTensor>();
+  auto dims = framework::make_ddim(shape);
+  tensor->Resize(dims);
+#ifdef PADDLE_WITH_CUDA
+  platform::CUDAPlace place;
+#else
+  platform::CPUPlace place;
+#endif
+  RandomizeTensor(tensor, place);
+}
+
 }  // namespace lite
 }  // namespace inference
 }  // namespace paddle