PaddlePaddle
diff --git a/‎paddle/fluid/extension/include/ext_tensor.h‎
Lines changed: 11 additions & 1 deletion b/‎paddle/fluid/extension/include/ext_tensor.h‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎paddle/fluid/extension/src/ext_tensor.cc‎
Lines changed: 25 additions & 0 deletions b/‎paddle/fluid/extension/src/ext_tensor.cc‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎paddle/fluid/framework/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/fluid/framework/custom_tensor_test.cc‎
Lines changed: 37 additions & 0 deletions b/‎paddle/fluid/framework/custom_tensor_test.cc‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/framework.proto‎
Lines changed: 17 additions & 0 deletions b/‎paddle/fluid/framework/framework.proto‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc‎
Lines changed: 37 additions & 0 deletions b/‎paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/process_mesh_desc.cc‎
Lines changed: 61 additions & 0 deletions b/‎paddle/fluid/framework/process_mesh_desc.cc‎
Lines changed: 61 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/process_mesh_desc.h‎
Lines changed: 65 additions & 0 deletions b/‎paddle/fluid/framework/process_mesh_desc.h‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/proto_desc.h‎
Lines changed: 8 additions & 0 deletions b/‎paddle/fluid/framework/proto_desc.h‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/var_desc.cc‎
Lines changed: 40 additions & 0 deletions b/‎paddle/fluid/framework/var_desc.cc‎
Lines changed: 40 additions & 0 deletions
@@ -88,10 +88,20 @@ class PD_DLL_DECL Tensor {
   /// It's usually used to set the input tensor data.
   /// \param PlaceType of target place, of which
   /// the tensor will copy to.
-
   template <typename T>
   Tensor copy_to(const PlaceType& place) const;
 
+  /// \brief Return a sub-tensor of the given tensor.
+  /// It is usually used to extract a sub-tensor (which supports
+  /// modifying the data of the original tensor) to perform further
+  /// operations.
+  /// \param begin_idx The index of the start row (inclusive) to slice.
+  ///                  The index number begins from 0.
+  /// \param end_idx  The index of the end row (exclusive) to slice.
+  ///                 The index number begins from begin_idx + 1.
+  /// \return The sliced tensor.
+  Tensor slice(const int64_t begin_idx, const int64_t end_idx) const;
+
   /// \brief Return the shape of the Tensor.
   std::vector<int64_t> shape() const;
 
 
@@ -124,6 +124,21 @@ void DeviceCopy(T *src, T *dst, PlaceType src_plc, PlaceType dst_plc,
   }                                                     \
   auto *tensor = static_cast<framework::LoDTensor *>(tensor_.get());
 
+#define GET_INNER_PLACE                               \
+  platform::Place place;                              \
+  switch (place_) {                                   \
+    case PlaceType::kCPU:                             \
+      place = platform::CPUPlace();                   \
+      break;                                          \
+    case PlaceType::kGPU:                             \
+      place = platform::CUDAPlace();                  \
+      break;                                          \
+    default:                                          \
+      PADDLE_THROW(platform::errors::Unavailable(     \
+          "Custom operator unsupported place id(%d)", \
+          static_cast<int>(place_)));                 \
+  }
+
 void Tensor::reshape(const std::vector<int64_t> &shape) {
   GET_CASTED_TENSOR
   auto new_dim = framework::make_ddim(shape);
@@ -257,6 +272,16 @@ Tensor Tensor::copy_to(const PlaceType &target_place) const {
   return target;
 }
 
+Tensor Tensor::slice(const int64_t begin_idx, const int64_t end_idx) const {
+  GET_CASTED_TENSOR
+  GET_INNER_PLACE
+  framework::Tensor intermediate = tensor->Slice(begin_idx, end_idx);
+  Tensor target = Tensor(place_);
+  framework::CustomTensorUtils::ShareDataFrom(
+      static_cast<const void *>(&intermediate), target);
+  return target;
+}
+
 template PD_DLL_DECL Tensor
 Tensor::copy_to<float>(const PlaceType &target_place) const;
 template PD_DLL_DECL Tensor
 
@@ -202,7 +202,7 @@ cc_test(operator_exception_test SRCS operator_exception_test.cc DEPS operator op
 cc_library(version SRCS version.cc)
 cc_test(version_test SRCS version_test.cc DEPS version)
 
-cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute shape_inference op_info operator glog version)
+cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc process_mesh_desc.cc DEPS attribute shape_inference op_info operator glog version)
 
 cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc)
 
 
@@ -92,6 +92,41 @@ void TestAPISizeAndShape() {
   CHECK(t1.shape() == tensor_shape);
 }
 
+void TestAPISlice() {
+  std::vector<int64_t> tensor_shape_origin1 = {5, 5};
+  std::vector<int64_t> tensor_shape_sub1 = {3, 5};
+  std::vector<int64_t> tensor_shape_origin2 = {5, 5, 5};
+  std::vector<int64_t> tensor_shape_sub2 = {1, 5, 5};
+#ifdef PADDLE_WITH_CUDA
+  auto t1 = paddle::Tensor(paddle::PlaceType::kGPU, tensor_shape_origin1);
+  t1.mutable_data<float>();
+  CHECK(t1.slice(0, 5).shape() == tensor_shape_origin1);
+  CHECK(t1.slice(0, 3).shape() == tensor_shape_sub1);
+  auto t2 = paddle::Tensor(paddle::PlaceType::kGPU, tensor_shape_origin2);
+  t2.mutable_data<float>();
+  CHECK(t2.slice(4, 5).shape() == tensor_shape_sub2);
+#endif
+  auto t3 = paddle::Tensor(paddle::PlaceType::kCPU, tensor_shape_origin1);
+  t3.mutable_data<float>();
+  CHECK(t3.slice(0, 5).shape() == tensor_shape_origin1);
+  CHECK(t3.slice(0, 3).shape() == tensor_shape_sub1);
+  auto t4 = paddle::Tensor(paddle::PlaceType::kCPU, tensor_shape_origin2);
+  t4.mutable_data<float>();
+  CHECK(t4.slice(4, 5).shape() == tensor_shape_sub2);
+
+  // Test writing function for sliced tensor
+  auto t = InitCPUTensorForTest<float>();
+  auto t_sliced = t.slice(0, 1);
+  auto* t_sliced_data_ptr = t_sliced.mutable_data<float>();
+  for (int64_t i = 0; i < t_sliced.size(); i++) {
+    t_sliced_data_ptr[i] += static_cast<float>(5);
+  }
+  auto* t_data_ptr = t.mutable_data<float>();
+  for (int64_t i = 0; i < t_sliced.size(); i++) {
+    CHECK_EQ(t_data_ptr[i], static_cast<float>(10));
+  }
+}
+
 template <typename T>
 paddle::DataType TestDtype() {
   std::vector<int64_t> tensor_shape = {5, 5};
@@ -261,6 +296,8 @@ TEST(CustomTensor, copyTest) {
   TestAPISizeAndShape();
   VLOG(2) << "TestPlace";
   TestAPIPlace();
+  VLOG(2) << "TestSlice";
+  TestAPISlice();
   VLOG(2) << "TestCast";
   GroupTestCast();
   VLOG(2) << "TestDtypeConvert";
 
@@ -38,6 +38,13 @@ enum AttrType {
   FLOAT64S = 12;
 }
 
+message ProcessMeshDesc {
+  required int32 id = 1;
+  required int32 parent_id = 2;
+  repeated int32 topology = 3;
+  repeated int32 process_group = 4;
+};
+
 // OpDesc describes an instance of a C++ framework::OperatorBase
 // derived class type.
 message OpDesc {
@@ -167,6 +174,15 @@ message VarType {
 }
 
 message VarDesc {
+
+  message Attr {
+    required string name = 1;
+    required AttrType type = 2;
+    optional int32 i = 3;
+    optional string s = 4;
+    repeated int32 ints = 5;
+  };
+
   required string name = 1;
   required VarType type = 2;
   optional bool persistable = 3 [ default = false ];
@@ -175,6 +191,7 @@ message VarDesc {
   optional bool need_check_feed = 4 [ default = false ];
   optional bool is_parameter = 5 [ default = false ];
   optional bool stop_gradient = 6 [ default = false ];
+  repeated Attr attrs = 7;
 }
 
 message BlockDesc {
 
@@ -153,6 +153,43 @@ QuantDequantFusePass::QuantDequantFusePass() {
       .AddAttr("data_format")
       .IsStringIn({"NCHW", "NHWC", "AnyLayout"})
       .End();
+  AddOpCompat(OpCompat("depthwise_conv2d"))
+      .AddInput("Input")
+      .IsTensor()
+      .End()
+      .AddInput("Filter")
+      .IsTensor()
+      .End()
+      .AddInput("Bias")
+      .IsTensor()
+      .IsOptional()
+      .End()
+      .AddInput("ResidualData")
+      .IsTensor()
+      .IsOptional()
+      .End()
+      .AddOutput("Output")
+      .IsTensor()
+      .End()
+      .AddAttr("strides")
+      .IsType<std::vector<int>>()
+      .End()
+      .AddAttr("paddings")
+      .IsType<std::vector<int>>()
+      .End()
+      .AddAttr("padding_algorithm")
+      .IsOptional()
+      .IsStringIn({"EXPLICIT", "SAME", "VALID"})
+      .End()
+      .AddAttr("groups")
+      .IsNumGE(1)
+      .End()
+      .AddAttr("dilations")
+      .IsType<std::vector<int>>()
+      .End()
+      .AddAttr("data_format")
+      .IsStringIn({"NCHW", "NHWC", "AnyLayout"})
+      .End();
   AddOpCompat(OpCompat("mul"))
       .AddInput("X")
       .IsTensor()
 
@@ -0,0 +1,61 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/framework/process_mesh_desc.h"
+
+namespace paddle {
+namespace framework {
+
+int32_t ProcessMeshDesc::next_id = -1;
+
+ProcessMeshDesc::ProcessMeshDesc(const std::vector<int32_t> &topo,
+                                 const std::vector<int32_t> &process_group,
+                                 int32_t parent_id) {
+  int32_t cur_id = ++next_id;
+  desc_.set_id(cur_id);
+  desc_.set_parent_id(parent_id);
+  for (size_t i = 0; i != topo.size(); ++i) {
+    desc_.add_topology(topo[i]);
+  }
+  for (size_t i = 0; i != process_group.size(); ++i) {
+    desc_.add_process_group(process_group[i]);
+  }
+  ProcessMeshDescMap::GetInstance().Insert(cur_id, this);
+}
+
+std::vector<int32_t> ProcessMeshDesc::Topology() const {
+  size_t size = desc_.topology_size();
+  std::vector<int32_t> ret(size);
+  for (auto i = 0; i != desc_.topology_size(); ++i) {
+    ret[i] = desc_.topology(i);
+  }
+  return ret;
+}
+
+std::vector<int32_t> ProcessMeshDesc::ProcessGroup() const {
+  size_t size = desc_.process_group_size();
+  std::vector<int32_t> ret(size);
+  for (auto i = 0; i != desc_.process_group_size(); ++i) {
+    ret[i] = desc_.process_group(i);
+  }
+  return ret;
+}
+
+ProcessMeshDescMap &ProcessMeshDescMap::GetInstance() {
+  static ProcessMeshDescMap g_process_mesh_desc_map;
+  return g_process_mesh_desc_map;
+}
+
+}  // namespace framework
+}  // namespace paddle
@@ -0,0 +1,65 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <unordered_map>
+#include <vector>
+
+#include "paddle/fluid/framework/framework.pb.h"
+#include "paddle/fluid/framework/proto_desc.h"
+#include "paddle/fluid/platform/enforce.h"
+#include "paddle/fluid/platform/macros.h"
+
+namespace paddle {
+namespace framework {
+
+class ProcessMeshDesc {
+ public:
+  ProcessMeshDesc(const std::vector<int32_t>& topo,
+                  const std::vector<int32_t>& process_group, int32_t parent_id);
+
+  int32_t ID() const { return desc_.id(); }
+  int32_t Parent() const { return desc_.parent_id(); }
+
+  std::vector<int32_t> Topology() const;
+  std::vector<int32_t> ProcessGroup() const;
+
+  static int32_t next_id;
+
+ private:
+  proto::ProcessMeshDesc desc_;  // not_own
+};
+
+class ProcessMeshDescMap {
+ public:
+  static ProcessMeshDescMap& GetInstance();
+
+  bool Has(int32_t index) const { return map_.find(index) != map_.end(); }
+
+  void Insert(int32_t index, ProcessMeshDesc* mesh) {
+    PADDLE_ENFORCE_NE(
+        Has(index), true,
+        platform::errors::AlreadyExists("Index (%d) has been used.", index));
+    map_.insert(std::make_pair(index, mesh));
+  }
+
+ private:
+  ProcessMeshDescMap() = default;
+  // Use raw pointer to avoid double free
+  std::unordered_map<int32_t, ProcessMeshDesc*> map_;
+  DISABLE_COPY_AND_ASSIGN(ProcessMeshDescMap);
+};
+}  // namespace framework
+}  // namespace paddle
@@ -22,5 +22,13 @@ constexpr int kRootBlockIndex = 0;
 // The Parent Index of root Block, this block does not exist.
 constexpr int kNoneBlockIndex = -1;
 
+// The Parent Index of root ProcessMesh, this ProcessMesh does not exist.
+constexpr int kNoneProcessMeshIndex = -1;
+
+// If a attribute name has a certain suffix, it means that the
+// atrribute is a distributed-related attribute for auto parallel.
+// e.g., "mesh_id@PARALLEL".
+constexpr char kAutoParallelSuffix[] = "@PARALLEL";
+
 }  // namespace framework
 }  // namespace paddle
@@ -280,6 +280,46 @@ std::vector<proto::VarType::TensorDesc *> VarDesc::mutable_tensor_descs() {
   }
 }
 
+std::vector<std::string> VarDesc::AttrNames() const {
+  std::vector<std::string> retv;
+  retv.reserve(attrs_.size());
+  for (auto &attr : attrs_) {
+    retv.push_back(attr.first);
+  }
+  return retv;
+}
+
+void VarDesc::RemoveAttr(const std::string &name) { attrs_.erase(name); }
+
+void VarDesc::SetAttr(const std::string &name, const Attribute &v) {
+  // NOTICE(sandyhouse): pybind11 will take the empty list in python as
+  // the std::vector<int> type in C++; so we have to change the attr's type
+  // here if we meet this issue
+  proto::AttrType attr_type = static_cast<proto::AttrType>(v.which() - 1);
+  if (attr_type == proto::AttrType::INTS &&
+      BOOST_GET_CONST(std::vector<int>, v).size() == 0u) {
+    // Find current attr via attr name and set the correct attribute value
+    this->attrs_[name] = std::vector<int>();
+    return;
+  }
+  bool valid = attr_type == proto::AttrType::INT ||
+               attr_type == proto::AttrType::STRING ||
+               attr_type == proto::AttrType::INTS;
+  PADDLE_ENFORCE_EQ(valid, true, platform::errors::InvalidArgument(
+                                     "The value for attr (%s) must be "
+                                     "one of list or int or string.",
+                                     name));
+
+  this->attrs_[name] = v;
+}
+
+Attribute VarDesc::GetAttr(const std::string &name) const {
+  auto it = attrs_.find(name);
+  PADDLE_ENFORCE_NE(it, attrs_.end(), platform::errors::NotFound(
+                                          "Attribute %s is not found.", name));
+  return it->second;
+}
+
 bool operator==(const VarDesc &left, const VarDesc &right) {
   return left.Proto()->SerializeAsString() ==
          right.Proto()->SerializeAsString();