PaddlePaddle · MingMingShangTian · Nov 22, 2021 · Nov 1, 2021 · Nov 1, 2021 · Nov 2, 2021
diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
@@ -1183,6 +1183,9 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
       }
       BuildPtenKernelContext(*runtime_ctx, dev_ctx);
       (*pt_kernel_)(pt_kernel_context_.get());
+
+      WriteBackToOutputs(runtime_ctx);
+
       pt_kernel_context_->ClearData();
     } else {
       (*kernel_func_)(
@@ -1808,50 +1811,98 @@ void OperatorWithKernel::BuildPtenKernelContext(
   for (size_t i = 0; i < input_names.size(); ++i) {
     auto& in_def = input_defs.at(i);
     auto& ins_vector = ctx.inputs.at(input_names[i]);
-    if (pt_kernel_context_->InputsSize() <= i) {
+
+    // calcute the start and end index of the input tensors
+    size_t start_idx =
+        (i == 0 ? 0 : pt_kernel_context_->InputRangeAt(i - 1).second);
+    size_t end_idx = start_idx + ins_vector.size();
+
+    // The current size of input/output in pt_kernel_context_ is at least equal
+    // the start_idx. For the reason of reusing the allocted of inputs or
+    // outputs in pt_kernel_context_, the current size of input/output can be
+    // greater then the index of which the tensort wanted to set to, so it will
+    // use ReMakePtenDenseTensorFromVar to make pten tensor.
+    if (pt_kernel_context_->InputsSize() == start_idx) {
       paddle::SmallVector<std::shared_ptr<pten::TensorBase>> tmp_inputs;
       for (auto* var : ins_vector) {
         tmp_inputs.emplace_back(
             experimental::MakePtenTensorBaseFromVar(*var, in_def));
       }
       pt_kernel_context_->EmplaceBackInputs(std::move(tmp_inputs));
-    } else {
+    } else if (pt_kernel_context_->InputsSize() > start_idx) {
       size_t input_size = pt_kernel_context_->InputsSize();
       for (size_t j = 0; j < ins_vector.size(); ++j) {
-        if (input_size > i + j) {
+        if (input_size > start_idx + j) {
           experimental::ReMakePtenDenseTensorFromVar(
               *ins_vector[j], in_def,
-              pt_kernel_context_->MutableInputAt<pten::DenseTensor>(i + j));
+              pt_kernel_context_->MutableInputAt<pten::DenseTensor>(start_idx +
+                                                                    j));
+          // TODO(chentianyu03): When multi input kernel, open this code
+          /*
+          } else {
+            pt_kernel_context_->EmplaceBackInputWithoutSetRange(
+                experimental::MakePtenTensorBaseFromVar(*ins_vector[j],
+          in_def));
+          */
         }
-        // TODO(chenweihang): adapt multi-input case later
       }
       pt_kernel_context_->MutableInputRangeAt(i) =
-          std::make_pair(i, i + ins_vector.size());
+          std::make_pair(start_idx, end_idx);
+    } else {
+      PADDLE_THROW(platform::errors::PreconditionNotMet(
+          "Error start index when trying to set new tensor to inputs, start "
+          "index is `%d`, but current pt_kernel_context_.inputs.size() is "
+          "`%d`.",
+          start_idx, pt_kernel_context_->InputsSize()));
     }
   }
 
   for (size_t i = 0; i < output_names.size(); ++i) {
     auto& out_def = output_defs.at(i);
     auto& outs_vector = ctx.outputs.at(output_names[i]);
-    if (pt_kernel_context_->OutputsSize() <= i) {
+
+    size_t start_idx =
+        (i == 0 ? 0 : pt_kernel_context_->OutputRangeAt(i - 1).second);
+    size_t end_idx = start_idx + outs_vector.size();
+
+    // The current size of input/output in pt_kernel_context_ is at least equal
+    // the start_idx. For the reason of reusing the allocted of inputs or
+    // outputs in pt_kernel_context_, the current size of input/output can be
+    // greater then the index of which the tensort wanted to set to, so it will
+    // use ReMakePtenDenseTensorFromVar to make pten tensor.
+    if (pt_kernel_context_->OutputsSize() == start_idx) {
       paddle::SmallVector<std::shared_ptr<pten::TensorBase>> tmp_outputs;
       for (auto* var : outs_vector) {
         tmp_outputs.emplace_back(
             experimental::MakePtenTensorBaseFromVar(var, out_def));
       }
       pt_kernel_context_->EmplaceBackOutputs(std::move(tmp_outputs));
-    } else {
+    } else if (pt_kernel_context_->OutputsSize() > start_idx) {
       size_t output_size = pt_kernel_context_->OutputsSize();
       for (size_t j = 0; j < outs_vector.size(); ++j) {
-        if (output_size > i + j) {
+        if (output_size > start_idx + j) {
           experimental::ReMakePtenDenseTensorFromVar(
               outs_vector[j], out_def,
-              pt_kernel_context_->MutableOutputAt<pten::DenseTensor>(i + j));
+              pt_kernel_context_->MutableOutputAt<pten::DenseTensor>(start_idx +
+                                                                     j));
+
+          // TODO(chentianyu03): When multi output kernel, open this code
+          /*
+          } else {
+            pt_kernel_context_->EmplaceBackOutputWithoutSetRange(
+                experimental::MakePtenTensorBaseFromVar(outs_vector[j],
+          out_def));
+              */
         }
-        // TODO(chenweihang): adapt multi-output case later
       }
       pt_kernel_context_->MutableOutputRangeAt(i) =
-          std::make_pair(i, i + outs_vector.size());
+          std::make_pair(start_idx, end_idx);
+    } else {
+      PADDLE_THROW(platform::errors::PreconditionNotMet(
+          "Error start index when trying to set new tensor to inputs, start "
+          "index is `%d`, but current pt_kernel_context_.outputs.size() is "
+          "`%d`.",
+          start_idx, pt_kernel_context_->OutputsSize()));
     }
   }
 
@@ -1883,14 +1934,23 @@ void OperatorWithKernel::BuildPtenKernelContext(
       } else if (attr_defs[i].type_index == std::type_index(typeid(bool))) {
         pt_kernel_context_->EmplaceBackAttr(BOOST_GET_CONST(bool, attr));
       } else if (attr_defs[i].type_index ==
-                     std::type_index(typeid(std::vector<int64_t>)) &&
-                 std::type_index(attr.type()) ==
-                     std::type_index(typeid(std::vector<int>))) {
-        // Emplace Back Attr according to the type of Pten_Kernel args.
-        const auto& vector_int_attr = BOOST_GET_CONST(std::vector<int>, attr);
-        const std::vector<int64_t> vector_int64_attr(vector_int_attr.begin(),
-                                                     vector_int_attr.end());
-        pt_kernel_context_->EmplaceBackAttr(vector_int64_attr);
+                 std::type_index(typeid(pten::DataType))) {
+        auto data_type = pten::TransToPtenDataType(
+            static_cast<framework::proto::VarType::Type>(
+                BOOST_GET_CONST(int, attr)));
+        pt_kernel_context_->EmplaceBackAttr(data_type);
+      } else if (attr_defs[i].type_index ==
+                 std::type_index(typeid(std::vector<int64_t>))) {
+        if (std::type_index(attr.type()) ==
+            std::type_index(typeid(std::vector<int>))) {
+          // Emplace Back Attr according to the type of Pten_Kernel args.
+          const auto& vector_int_attr = BOOST_GET_CONST(std::vector<int>, attr);
+          const std::vector<int64_t> vector_int64_attr(vector_int_attr.begin(),
+                                                       vector_int_attr.end());
+          pt_kernel_context_->EmplaceBackAttr(vector_int64_attr);
+        }
+        // TODO(YuanRisheng) Need support vector<int64_t> attr
+
       } else {
         PADDLE_THROW(platform::errors::Unimplemented(
             "unsupported cast op attribute `%s` when construct "
@@ -1901,5 +1961,26 @@ void OperatorWithKernel::BuildPtenKernelContext(
   }
 }
 
+void OperatorWithKernel::WriteBackToOutputs(RuntimeContext* ctx) const {
+  // auto& input_names = std::get<0>(pt_kernel_signature_->args);
+  // auto& attr_names = std::get<1>(pt_kernel_signature_->args);
+  auto& output_names = std::get<2>(pt_kernel_signature_->args);
+
+  // pt_kernel_context_
+
+  for (size_t i = 0; i < output_names.size(); ++i) {
+    auto& outs_vector = ctx->outputs.at(output_names[i]);
+
+    auto& range_pair = pt_kernel_context_->OutputRangeAt(i);
+    auto pten_outs =
+        pt_kernel_context_->MutableOutputBetween<pten::DenseTensor>(
+            range_pair.first, range_pair.second);
+
+    for (size_t j = 0; j < pten_outs.size(); ++j) {
+      experimental::MakeVariableFromPtenTensor(pten_outs[j], outs_vector[j]);
+    }
+  }
+}
+
 }  // namespace framework
 }  // namespace paddle
@@ -589,6 +589,8 @@ class OperatorWithKernel : public OperatorBase {
   void BuildPtenKernelContext(const RuntimeContext& ctx,
                               platform::DeviceContext* dev_ctx) const;
 
+  void WriteBackToOutputs(RuntimeContext* ctx) const;
+
  protected:
   mutable std::unique_ptr<OpKernelType> kernel_type_;
   mutable std::unique_ptr<OpKernelFunc> kernel_func_;

diff --git a/paddle/fluid/framework/tensor.cc b/paddle/fluid/framework/tensor.cc
@@ -204,10 +204,12 @@ void Tensor::ResetHolder(std::shared_ptr<memory::Allocation> holder) {
 }
 
 void Tensor::ResetHolderWithType(std::shared_ptr<memory::Allocation> holder,
-                                 const proto::VarType::Type type) {
-  ResetHolder(holder);
+                                 const proto::VarType::Type& type) {
   type_ = type;
+  ResetHolder(holder);
 }
 
+void Tensor::set_type(const proto::VarType::Type& type) { type_ = type; }
+
 }  // namespace framework
 }  // namespace paddle
@@ -271,7 +271,9 @@ class Tensor {
   void ResetHolder(std::shared_ptr<memory::Allocation> holder);
 
   void ResetHolderWithType(std::shared_ptr<memory::Allocation> holder,
-                           const proto::VarType::Type type);
+                           const proto::VarType::Type& type);
+
+  void set_type(const proto::VarType::Type& type);
 
   TensorInplaceVersion& InplaceVersionCounter() {
     return *inplace_version_counter_;