PaddlePaddle
diff --git a/‎lite/kernels/x86/scale_compute.h‎
Lines changed: 19 additions & 1 deletion b/‎lite/kernels/x86/scale_compute.h‎
Lines changed: 19 additions & 1 deletion
diff --git a/‎lite/kernels/x86/slice_compute.cc‎
Lines changed: 57 additions & 0 deletions b/‎lite/kernels/x86/slice_compute.cc‎
Lines changed: 57 additions & 0 deletions
diff --git a/‎lite/kernels/x86/slice_compute.h‎
Lines changed: 118 additions & 0 deletions b/‎lite/kernels/x86/slice_compute.h‎
Lines changed: 118 additions & 0 deletions
diff --git a/‎lite/kernels/xpu/scale_compute.cc‎
Lines changed: 34 additions & 0 deletions b/‎lite/kernels/xpu/scale_compute.cc‎
Lines changed: 34 additions & 0 deletions
@@ -26,14 +26,25 @@ namespace kernels {
 namespace x86 {
 
 template <typename T>
-void scale_compute(
+static void scale_compute(
     const T* x, T* out, int size, T scale, T bias, bool bias_before) {
   if (bias_before) bias *= scale;
   for (int i = 0; i < size; i++) {
     out[i] = x[i] * scale + bias;
   }
 }
 
+template <typename T>
+static void print_tensor(lite::Tensor* Tensor) {
+  std::cout << "*********tensor********" << std::endl;
+  T* data = Tensor->mutable_data<T>();
+  std::cout << "Tensor len: " << Tensor->data_size() << std::endl;
+  for (int j = 0; j < Tensor->data_size() && j < 10; j++) {
+    std::cout << data[j] << " ";
+  }
+  std::cout << std::endl;
+}
+
 template <typename T>
 class ScaleCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
  public:
@@ -49,6 +60,13 @@ class ScaleCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
                   scale,
                   bias,
                   !param.bias_after_scale);
+    std::cout << "********scale********" << std::endl;
+    std::cout << "Input: " << std::endl;
+    print_tensor<T>(param.x);
+    std::cout << "scale: " << scale << std::endl;
+    std::cout << "bias: " << bias << std::endl;
+    std::cout << "Output: " << std::endl;
+    print_tensor<T>(param.output);
   }
 
   virtual ~ScaleCompute() = default;
 
@@ -33,6 +33,25 @@ REGISTER_LITE_KERNEL(slice,
     .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kFloat))})
     .Finalize();
 
+REGISTER_LITE_KERNEL(slice,
+                     kX86,
+                     kFloat,
+                     kNCHW,
+                     paddle::lite::kernels::x86::SliceCompute<float>,
+                     array_def)
+    .BindInput("Input",
+               {LiteType::GetTensorListTy(TARGET(kX86), PRECISION(kFloat))})
+    .BindInput("StartsTensor",
+               {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
+    .BindInput("EndsTensor",
+               {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
+    .BindInput("StartsTensorList",
+               {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
+    .BindInput("EndsTensorList",
+               {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kFloat))})
+    .Finalize();
+
 REGISTER_LITE_KERNEL(slice,
                      kX86,
                      kFloat,
@@ -52,6 +71,25 @@ REGISTER_LITE_KERNEL(slice,
     .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt32))})
     .Finalize();
 
+REGISTER_LITE_KERNEL(slice,
+                     kX86,
+                     kFloat,
+                     kNCHW,
+                     paddle::lite::kernels::x86::SliceCompute<int>,
+                     array_int32)
+    .BindInput("Input",
+               {LiteType::GetTensorListTy(TARGET(kX86), PRECISION(kInt32))})
+    .BindInput("StartsTensor",
+               {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
+    .BindInput("EndsTensor",
+               {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
+    .BindInput("StartsTensorList",
+               {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
+    .BindInput("EndsTensorList",
+               {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt32))})
+    .Finalize();
+
 REGISTER_LITE_KERNEL(slice,
                      kX86,
                      kFloat,
@@ -70,3 +108,22 @@ REGISTER_LITE_KERNEL(slice,
                {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
     .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))})
     .Finalize();
+
+REGISTER_LITE_KERNEL(slice,
+                     kX86,
+                     kFloat,
+                     kNCHW,
+                     paddle::lite::kernels::x86::SliceCompute<int64_t>,
+                     array_int64)
+    .BindInput("Input",
+               {LiteType::GetTensorListTy(TARGET(kX86), PRECISION(kInt64))})
+    .BindInput("StartsTensor",
+               {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
+    .BindInput("EndsTensor",
+               {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
+    .BindInput("StartsTensorList",
+               {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
+    .BindInput("EndsTensorList",
+               {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))})
+    .Finalize();
@@ -28,6 +28,88 @@ namespace lite {
 namespace kernels {
 namespace x86 {
 
+static void print_tensor_array(const std::vector<lite::Tensor>* XTensorList) {
+  std::cout << "*********tensor array********" << std::endl;
+  for (int i = 0; i < XTensorList->size(); i++) {
+    auto tensor = XTensorList->at(i);
+    std::cout << "Tensor " << i << " len: " << tensor.data_size() << std::endl;
+    for (int j = 0; j < tensor.data_size() && j < 10; j++) {
+      std::cout << tensor.mutable_data<float>()[j] << " ";
+    }
+    std::cout << std::endl;
+  }
+}
+
+static void print_tensor(lite::Tensor* Tensor) {
+  std::cout << "*********tensor********" << std::endl;
+  float* data = Tensor->mutable_data<float>();
+  std::cout << "Tensor len: " << Tensor->data_size() << std::endl;
+  for (int j = 0; j < Tensor->data_size() && j < 10; j++) {
+    std::cout << data[j] << " ";
+  }
+  std::cout << std::endl;
+}
+
+void DealTensorArray(const std::vector<lite::Tensor>* XTensorList,
+                     std::vector<lite::Tensor>* OutTensorList,
+                     lite::Tensor* Out,
+                     const std::vector<int>& starts,
+                     const std::vector<int>& ends,
+                     bool out_is_array) {
+  auto in_array = XTensorList;
+  // If the input is LoDTensorArray, the rank of input is 1.
+  int64_t in_size = in_array->size();
+  int64_t start = starts[0] < 0 ? (starts[0] + in_size) : starts[0];
+  int64_t end = ends[0] < 0 ? (ends[0] + in_size) : ends[0];
+
+  start = std::max(start, static_cast<int64_t>(0));
+  end = std::max(end, static_cast<int64_t>(0));
+  end = std::min(end, in_size);
+
+  CHECK_GT(end, start) << "end should greater than start";
+  int64_t out_size = end - start;
+
+  std::cout << "starts: " << std::endl;
+  for (int i = 0; i < starts.size(); i++) {
+    std::cout << starts[i] << " ";
+  }
+  std::cout << std::endl;
+  std::cout << "ends: " << std::endl;
+  for (int i = 0; i < ends.size(); i++) {
+    std::cout << ends[i] << " ";
+  }
+  std::cout << std::endl;
+
+  if (out_is_array) {
+    auto out_array = OutTensorList;
+    out_array->resize(out_size);
+    for (int i = 0; i < out_size; ++i) {
+      auto* out_tensor = &out_array->at(i);
+      auto in_tensor = in_array->at(i + start);
+      out_tensor->set_lod(in_tensor.lod());
+      if (in_tensor.memory_size() > 0) {
+        out_tensor->CopyDataFrom(in_tensor);
+      } else {
+        VLOG(4) << "WARNING: The input tensor 'x_tensor' holds no memory, so "
+                   "nothing has been written to output array["
+                << i << "].";
+      }
+    }
+  } else {
+    auto out_tensor = Out;
+    auto in_tensor = in_array->at(start);
+    out_tensor->CopyDataFrom(in_tensor);
+  }
+  std::cout << "input array:" << std::endl;
+  print_tensor_array(XTensorList);
+  if (out_is_array) {
+    std::cout << "out array:" << std::endl;
+    print_tensor_array(OutTensorList);
+  } else {
+    print_tensor(Out);
+  }
+}
+
 inline std::vector<int> GetIntDataFromTensorList(
     const std::vector<lite::Tensor*>& list_tensor) {
   std::vector<int> vec_data;
@@ -219,6 +301,8 @@ void slice_compute(const lite::Tensor* in,
 template <class T>
 void slice_compute_(const lite::Tensor* Input,
                     lite::Tensor* Out,
+                    const std::vector<lite::Tensor>* XTensorList,
+                    std::vector<lite::Tensor>* OutTensorList,
                     std::vector<int> axes,
                     std::vector<int> starts,
                     std::vector<int> ends,
@@ -228,6 +312,38 @@ void slice_compute_(const lite::Tensor* Input,
                     std::vector<lite::Tensor*> StartsTensorList,
                     std::vector<lite::Tensor*> EndsTensorList,
                     std::vector<int> infer_flags) {
+  if (Input == nullptr && XTensorList != nullptr) {
+    bool need_infer = false;
+    if (StartsTensor || EndsTensor) {
+      need_infer = true;
+    }
+    if (StartsTensorList.size() > 0 || EndsTensorList.size() > 0) {
+      need_infer = true;
+    }
+    if (need_infer) {
+      if (StartsTensor) {
+        starts = GetIntDataFromTensor(StartsTensor);
+      } else if (StartsTensorList.size() > 0) {
+        starts = GetIntDataFromTensorList(StartsTensorList);
+      }
+      CHECK_EQ(starts.size(), axes.size())
+          << "The size of starts must be equal to the size of axes.";
+      if (EndsTensor) {
+        ends = GetIntDataFromTensor(EndsTensor);
+      } else if (EndsTensorList.size() > 0) {
+        ends = GetIntDataFromTensorList(EndsTensorList);
+      }
+      CHECK_EQ(ends.size(), axes.size())
+          << "The size of starts must be equal to the size of axes.";
+    }
+    DealTensorArray(XTensorList,
+                    OutTensorList,
+                    Out,
+                    starts,
+                    ends,
+                    (Out == nullptr && OutTensorList != nullptr));
+    return;
+  }
   int rank = Input->dims().size();
   switch (rank) {
     case 1:
@@ -320,6 +436,8 @@ class SliceCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
     auto& param = *param_.get_mutable<param_t>();
     slice_compute_<T>(param.X,
                       param.Out,
+                      param.XTensorList,
+                      param.OutTensorList,
                       param.axes,
                       param.starts,
                       param.ends,
 
@@ -21,6 +21,23 @@ namespace lite {
 namespace kernels {
 namespace xpu {
 
+template <typename T>
+static void print_tensor(lite::Tensor* Tensor) {
+  std::cout << "*********tensor********" << std::endl;
+  lite::Tensor tmp;
+  tmp.Resize(Tensor->dims());
+  TargetWrapperXPU::MemcpySync(tmp.mutable_data<T>(),
+                               Tensor->raw_data(),
+                               Tensor->memory_size(),
+                               IoDirection::DtoH);
+  T* data = tmp.mutable_data<T>();
+  std::cout << "Tensor len: " << tmp.data_size() << std::endl;
+  for (int j = 0; j < tmp.data_size() && j < 10; j++) {
+    std::cout << data[j] << " ";
+  }
+  std::cout << std::endl;
+}
+
 template <typename T>
 void ScaleCompute<T>::Run() {
   auto& param = this->template Param<param_t>();
@@ -45,6 +62,13 @@ void ScaleCompute<T>::Run() {
   if (!param.x->lod().empty()) {
     param.output->set_lod(param.x->lod());
   }
+  std::cout << "********scale********" << std::endl;
+  std::cout << "Input: " << std::endl;
+  print_tensor<T>(param.x);
+  std::cout << "scale: " << param.scale << std::endl;
+  std::cout << "bias: " << param.bias << std::endl;
+  std::cout << "Output: " << std::endl;
+  print_tensor<T>(param.output);
 }
 
 }  // namespace xpu
@@ -71,3 +95,13 @@ REGISTER_LITE_KERNEL(scale,
     .BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
     .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
     .Finalize();
+
+REGISTER_LITE_KERNEL(scale,
+                     kXPU,
+                     kFloat,
+                     kNCHW,
+                     paddle::lite::kernels::xpu::ScaleCompute<int64_t>,
+                     int64)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt64))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt64))})
+    .Finalize();