PaddlePaddle
diff --git a/‎paddle/fluid/framework/operator.cc‎
Lines changed: 92 additions & 226 deletions b/‎paddle/fluid/framework/operator.cc‎
Lines changed: 92 additions & 226 deletions
diff --git a/‎paddle/fluid/framework/operator.h‎
Lines changed: 20 additions & 16 deletions b/‎paddle/fluid/framework/operator.h‎
Lines changed: 20 additions & 16 deletions
diff --git a/‎paddle/fluid/framework/tcmpt_utils.cc‎
Lines changed: 115 additions & 2 deletions b/‎paddle/fluid/framework/tcmpt_utils.cc‎
Lines changed: 115 additions & 2 deletions
diff --git a/‎paddle/fluid/framework/tcmpt_utils.h‎
Lines changed: 81 additions & 1 deletion b/‎paddle/fluid/framework/tcmpt_utils.h‎
Lines changed: 81 additions & 1 deletion
diff --git a/‎paddle/fluid/framework/tcmpt_utils_test.cc‎
Lines changed: 8 additions & 2 deletions b/‎paddle/fluid/framework/tcmpt_utils_test.cc‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎paddle/fluid/framework/type_defs.h‎
Lines changed: 10 additions & 0 deletions b/‎paddle/fluid/framework/type_defs.h‎
Lines changed: 10 additions & 0 deletions
@@ -116,8 +116,6 @@ inline std::string GradOriginalVarName(const std::string& grad_var_name) {
 const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var);
 Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var);
 
-OpKernelType TransPtKernelKeyToOpKernelType(const pt::KernelKey& kernel_key);
-
 class ExecutionContext;
 class OperatorBase;
 
@@ -534,13 +532,15 @@ class OperatorWithKernel : public OperatorBase {
   }
 
   /* member functions for adapting to tcmpt lib */
-  // TODO(chenweihang): Temporarily as a class method
-  virtual pt::KernelKey ConstructPtKernelKey(
-      const VariableValueMap& inputs, const AttributeMap& attrs,
-      const platform::Place& ctx_place) const;
-
-  virtual pt::KernelContext ConstructPtKernelContext(
-      const RuntimeContext& ctx, const platform::DeviceContext& dev_ctx) const;
+  /** In the Tensor calculation library, the new Kernel adopts a clearer and
+    * more streamlined design. The arguments of the Kernel and the input and
+    * output arguments registered in the original OpMaker do not match in some
+    * cases, so we use map to record the arguments required by the kernel.
+    * When selecting Kernel during Op execution, select the arguments of the
+    * original Op according to the GetExpectedPtKernelArgs returned arguments.
+    */
+  virtual KernelSignature GetExpectedPtKernelArgs(
+      const ExecutionContext& ctx) const;
 
  private:
   void RunImpl(const Scope& scope, const platform::Place& place) const final;
@@ -563,8 +563,9 @@ class OperatorWithKernel : public OperatorBase {
                                const std::vector<std::string>& inplace_vars,
                                const Scope& exec_scope) const;
 
-  void ChooseKernel(const RuntimeContext& ctx, const Scope& scope,
-                    const platform::Place& place) const;
+  OpKernelType InnerGetExpectedKernelType(const ExecutionContext& ctx) const;
+
+  void ChooseKernel(const ExecutionContext& ctx) const;
 
   void HandleComplexGradToRealGrad(const Scope& scope,
                                    RuntimeContext* ctx) const;
@@ -582,8 +583,10 @@ class OperatorWithKernel : public OperatorBase {
                                    const std::string& name) const;
 
   /* member functions for adapting to tcmpt lib */
-  void ChoosePtKernel(const RuntimeContext& ctx,
-                      const platform::DeviceContext& dev_ctx) const;
+  void ChoosePtKernel(const ExecutionContext& ctx) const;
+
+  pt::KernelContext BuildPtKernelContext(
+      const RuntimeContext& ctx, const platform::DeviceContext& dev_ctx) const;
 
  protected:
   mutable std::unique_ptr<OpKernelType> kernel_type_;
@@ -595,10 +598,11 @@ class OperatorWithKernel : public OperatorBase {
   mutable bool all_kernels_must_compute_runtime_shape_ = false;
   mutable std::mutex cache_update_mutex_;
   mutable bool enable_cache_transfer_scope_ = false;
-  // TODO(chenweihang): Similar duplicate members are used for new tcmpt lib,
-  // maybe we have better impl methods
+  // NOTE(chenweihang): Similar op members are used to adapt to
+  // new tcmpt kernel, if there is a better design in the future,
+  // we may polish the implementation here
   mutable bool run_pt_kernel_ = false;
-  mutable std::unique_ptr<pt::KernelKey> pt_kernel_key_;
+  mutable std::unique_ptr<KernelSignature> pt_kernel_signature_;
   mutable std::unique_ptr<pt::Kernel> pt_kernel_;
 };
 
 
@@ -12,11 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include <sstream>
+
 #include "paddle/fluid/framework/tcmpt_utils.h"
 
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/selected_rows.h"
 #include "paddle/fluid/framework/variable.h"
+#include "paddle/fluid/string/string_helper.h"
 
 namespace paddle {
 namespace framework {
@@ -62,7 +65,7 @@ std::shared_ptr<pt::DenseTensor> MakeTensorImpl<pt::DenseTensor>(
     proto::VarType::Type type) {
   return MakeTensorImpl<pt::DenseTensor, LoDTensor>(
       tensor, pt::TransToPtBackend(place), pt::TransToPtDataType(type),
-      pt::TransToPtLayout(tensor.layout()));
+      pt::TransToPtDataLayout(tensor.layout()));
 }
 
 template <>
@@ -71,7 +74,7 @@ std::shared_ptr<pt::DenseTensor> MakeTensorImpl<pt::DenseTensor>(
     proto::VarType::Type type) {
   return MakeTensorImpl<pt::DenseTensor, Tensor>(
       tensor, pt::TransToPtBackend(place), pt::TransToPtDataType(type),
-      pt::TransToPtLayout(tensor.layout()));
+      pt::TransToPtDataLayout(tensor.layout()));
 }
 
 std::shared_ptr<tcmpt::TensorBase> InputVariableToPtTensor(
@@ -150,5 +153,115 @@ std::shared_ptr<tcmpt::TensorBase> OutputVariableToPtTensor(
   return nullptr;
 }
 
+OpKernelType TransPtKernelKeyToOpKernelType(const pt::KernelKey& kernel_key) {
+  proto::VarType::Type data_type = pt::TransToProtoVarType(kernel_key.dtype());
+  platform::Place place = pt::TransToFluidPlace(kernel_key.backend());
+  DataLayout data_layout = pt::TransToFluidDataLayout(kernel_key.layout());
+  LibraryType library_type = LibraryType::kPlain;
+  if (kernel_key.backend() == pt::Backend::kMKLDNN) {
+    library_type = LibraryType::kMKLDNN;
+  } else if (kernel_key.backend() == pt::Backend::kCUDNN) {
+    library_type = LibraryType::kCUDNN;
+  } else {
+    // do nothing
+  }
+  // TODO(chenweihang): the customized_type_value is lost
+  return OpKernelType(data_type, place, data_layout, library_type);
+}
+
+pt::KernelKey TransOpKernelTypeToPtKernelKey(const OpKernelType& kernel_type) {
+  pt::Backend backend = pt::TransToPtBackend(kernel_type.place_);
+  if (kernel_type.library_type_ == LibraryType::kMKLDNN) {
+    backend = pt::Backend::kMKLDNN;
+  } else if (kernel_type.library_type_ == LibraryType::kCUDNN) {
+    backend = pt::Backend::kCUDNN;
+  } else {
+    // do
+  }
+  pt::DataLayout layout = pt::TransToPtDataLayout(kernel_type.data_layout_);
+  pt::DataType dtype = pt::TransToPtDataType(kernel_type.data_type_);
+  return pt::KernelKey(backend, layout, dtype);
+}
+
+KernelSignatureMap& KernelSignatureMap::Instance() {
+  static KernelSignatureMap g_kernel_signature_map;
+  return g_kernel_signature_map;
+}
+
+const paddle::SmallVector<std::string>&
+KernelArgsNameMakerByOpProto::GetInputArgsNames() {
+  for (int i = 0; i < op_proto_->inputs_size(); ++i) {
+    auto& in = op_proto_->inputs()[i];
+    auto& in_name = in.name();
+    if ((in.has_extra() && in.extra()) || (in.has_quant() && in.quant())) {
+      VLOG(1) << "Parse PtKernel input: skip extra & quant input - " << in_name;
+      continue;
+    }
+    // If contains dispensable input, we should override the
+    // GetExpectedPtKernelArgs method self
+    if (in.has_dispensable() && in.dispensable()) {
+      VLOG(1) << "Parse PtKernel input: skip dispensable input - " << in_name;
+      continue;
+    }
+    VLOG(1) << "Parse PtKernel input: " << in_name;
+    input_names_.emplace_back(in_name);
+  }
+  return input_names_;
+}
+
+const paddle::SmallVector<std::string>&
+KernelArgsNameMakerByOpProto::GetOutputArgsNames() {
+  for (int i = 0; i < op_proto_->outputs_size(); ++i) {
+    auto& out = op_proto_->outputs()[i];
+    auto& out_name = out.name();
+    // TODO(chenweihang): outputs also need skip some cases
+    VLOG(1) << "Parse PtKernel output: " << out_name;
+    output_names_.emplace_back(out_name);
+  }
+  return output_names_;
+}
+
+const paddle::SmallVector<std::string>&
+KernelArgsNameMakerByOpProto::GetAttrsArgsNames() {
+  for (int i = 0; i < op_proto_->attrs_size(); ++i) {
+    auto& attr = op_proto_->attrs()[i];
+    auto& attr_name = attr.name();
+    if (attr_name == "use_mkldnn" || attr_name == "op_role" ||
+        attr_name == "op_role_var" || attr_name == "op_namescope" ||
+        attr_name == "op_callstack" || attr_name == "op_device") {
+      VLOG(1) << "Parse PtKernel attribute: skip needless attr - " << attr_name;
+      continue;
+    }
+    if ((attr.has_extra() && attr.extra()) ||
+        (attr.has_quant() && attr.quant())) {
+      VLOG(1) << "Parse PtKernel attribute: skip extra & quant attr - "
+              << attr_name;
+      continue;
+    }
+    VLOG(1) << "Parse PtKernel attribute: " << attr_name;
+    attr_names_.emplace_back(attr_name);
+  }
+
+  return attr_names_;
+}
+
+KernelSignature KernelArgsNameMakerByOpProto::GetKernelSignature() {
+  return std::make_pair(
+      op_proto_->type(),
+      std::make_tuple(GetInputArgsNames(), GetAttrsArgsNames(),
+                      GetOutputArgsNames()));
+}
+
+std::string KernelSignatureToString(const KernelSignature& signature) {
+  std::stringstream os;
+  os << "Kernel Signature - name: " << signature.first << "; inputs: "
+     << string::join_strings(std::get<0>(signature.second), ", ")
+     << "; attributes: "
+     << string::join_strings(std::get<1>(signature.second), ", ")
+     << "; outputs: "
+     << string::join_strings(std::get<2>(signature.second), ", ");
+  return os.str();
+}
+
 }  // namespace framework
 }  // namespace paddle
@@ -14,14 +14,25 @@ limitations under the License. */
 
 #pragma once
 
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "paddle/fluid/framework/framework.pb.h"
+#include "paddle/fluid/framework/op_kernel_type.h"
 #include "paddle/fluid/framework/tensor.h"
+#include "paddle/fluid/imperative/type_defs.h"
+#include "paddle/fluid/platform/macros.h"
 #include "paddle/fluid/platform/place.h"
-
 #include "paddle/tcmpt/api/include/core.h"
+#include "paddle/utils/flat_hash_map.h"
+#include "paddle/utils/small_vector.h"
 
 namespace paddle {
 namespace framework {
 
+/* tensor translate */
+
 template <typename PtTensorImplT, typename VariableT>
 std::shared_ptr<PtTensorImplT> MakeTensorImpl(const VariableT& tensor,
                                               pt::Backend backend,
@@ -49,5 +60,74 @@ std::shared_ptr<tcmpt::TensorBase> InputVariableToPtTensor(
 std::shared_ptr<tcmpt::TensorBase> OutputVariableToPtTensor(
     framework::Variable* variable, const pt::TensorArgDef& arg_def);
 
+/* Kernel Key translate */
+
+OpKernelType TransPtKernelKeyToOpKernelType(const pt::KernelKey& kernel_key);
+pt::KernelKey TransOpKernelTypeToPtKernelKey(const OpKernelType& kernel_type);
+
+/* Kernel Args parse */
+
+// TODO(chenweihang): we can generate this map by proto info in compile time
+class KernelSignatureMap {
+ public:
+  static KernelSignatureMap& Instance();
+
+  bool Has(const std::string& op_type) const {
+    return map_.find(op_type) != map_.end();
+  }
+
+  void Insert(const std::string& op_type, const KernelSignature& signature) {
+    if (!Has(op_type)) {
+      map_.insert({op_type, signature});
+    }
+  }
+
+  const KernelSignature* GetNullable(const std::string& op_type) const {
+    auto it = map_.find(op_type);
+    if (it == map_.end()) {
+      return nullptr;
+    } else {
+      return &it->second;
+    }
+  }
+
+ private:
+  KernelSignatureMap() = default;
+  paddle::flat_hash_map<std::string, KernelSignature> map_;
+
+  DISABLE_COPY_AND_ASSIGN(KernelSignatureMap);
+};
+
+class KernelArgsNameMaker {
+ public:
+  virtual ~KernelArgsNameMaker() {}
+  virtual const paddle::SmallVector<std::string>& GetInputArgsNames() = 0;
+  virtual const paddle::SmallVector<std::string>& GetOutputArgsNames() = 0;
+  virtual const paddle::SmallVector<std::string>& GetAttrsArgsNames() = 0;
+};
+
+class KernelArgsNameMakerByOpProto : public KernelArgsNameMaker {
+ public:
+  explicit KernelArgsNameMakerByOpProto(framework::proto::OpProto* op_proto)
+      : op_proto_(op_proto) {}
+
+  ~KernelArgsNameMakerByOpProto() {}
+
+  const paddle::SmallVector<std::string>& GetInputArgsNames() override;
+  const paddle::SmallVector<std::string>& GetOutputArgsNames() override;
+  const paddle::SmallVector<std::string>& GetAttrsArgsNames() override;
+
+  KernelSignature GetKernelSignature();
+
+ private:
+  framework::proto::OpProto* op_proto_;
+
+  paddle::SmallVector<std::string> input_names_;
+  paddle::SmallVector<std::string> output_names_;
+  paddle::SmallVector<std::string> attr_names_;
+};
+
+std::string KernelSignatureToString(const KernelSignature& signature);
+
 }  // namespace framework
 }  // namespace paddle
@@ -49,13 +49,19 @@ TEST(TcmptUtils, VarToPtTensor) {
   auto* data =
       value->mutable_data<int>(make_ddim({1, 1}), paddle::platform::CPUPlace());
   data[0] = 123;
-  auto tensor_def = pt::TensorArgDef(pt::Backend::kCUDA, pt::DataLayout::kNCHW,
+  pt::Backend expect_backend = pt::Backend::kCPU;
+
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+  expect_backend = pt::Backend::kCUDA;
+#endif
+  auto tensor_def = pt::TensorArgDef(expect_backend, pt::DataLayout::kNCHW,
                                      pt::DataType::kINT32);
   // 2. test API
   auto tensor_x = InputVariableToPtTensor(v, tensor_def);
   // 3. check result
-  ASSERT_EQ(tensor_x->backend(), pt::Backend::kCUDA);
+  ASSERT_EQ(tensor_x->backend(), expect_backend);
   ASSERT_EQ(tensor_x->data_type(), pt::DataType::kINT32);
+ 
 }
 
 }  // namespace framework
 
@@ -17,11 +17,13 @@ limitations under the License. */
 #include <map>
 #include <memory>
 #include <string>
+#include <tuple>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
 #include "paddle/fluid/imperative/type_defs.h"
 #include "paddle/fluid/platform/variant.h"
+#include "paddle/utils/small_vector.h"
 
 namespace paddle {
 namespace framework {
@@ -82,5 +84,13 @@ using InferShapeFN = std::function<void(InferShapeContext*)>;
 using InplacePair = std::unordered_map<std::string, std::string>;
 using InferInplaceOpFN = std::function<InplacePair(bool /*use_cuda*/)>;
 
+// tuple(input_names, attr_names, output_names)
+using KernelArgsTuple = std::tuple<paddle::SmallVector<std::string>,
+                                   paddle::SmallVector<std::string>,
+                                   paddle::SmallVector<std::string>>;
+// TODD(yuanrisheng): impl implicit overload signature, use KernelArgsTuple
+// directly
+using KernelSignature = std::pair<std::string, KernelArgsTuple>;
+
 }  // namespace framework
 }  // namespace paddle