PaddlePaddle · jim19930609 · Dec 26, 2021 · Dec 20, 2021 · Dec 21, 2021 · Dec 22, 2021
diff --git a/paddle/fluid/framework/mixed_vector.h b/paddle/fluid/framework/mixed_vector.h
@@ -23,17 +23,15 @@ limitations under the License. */
 
 #include "glog/logging.h"
 #include "paddle/fluid/framework/details/cow_ptr.h"
-#include "paddle/fluid/framework/tensor.h"
-#include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/fluid/memory/malloc.h"
 #include "paddle/fluid/memory/memcpy.h"
+#include "paddle/fluid/platform/device_context.h"
 #include "paddle/utils/none.h"
 #include "paddle/utils/optional.h"
 
 namespace paddle {
 namespace framework {
 
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 // Vector<T> implements the std::vector interface, and can get Data or
 // MutableData from any place. The data will be synced implicitly inside.
 template <typename T>
@@ -213,6 +211,7 @@ class Vector {
     };
 
     void CopyToCPU() const {
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
       // COPY GPU Data To CPU
       auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
           platform::DeviceContextPool::Instance().Get(gpu_->place()));
@@ -222,6 +221,7 @@ class Vector {
       paddle::memory::Copy(platform::CPUPlace(), dst, CUDAPlace().get(), src,
                            gpu_memory_size_, stream);
       dev_ctx->Wait();
+#endif
     }
 
     void MutableCPU() {
@@ -261,6 +261,7 @@ class Vector {
     }
 
     void CopyCPUDataToCUDA(const platform::Place &place) const {
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
       void *src = cpu_.data();
       gpu_memory_size_ = cpu_.size() * sizeof(T);
       gpu_ = memory::Alloc(place, gpu_memory_size_);
@@ -270,6 +271,7 @@ class Vector {
       auto stream = dev_ctx->stream();
       paddle::memory::Copy(CUDAPlace().get(), dst, platform::CPUPlace(), src,
                            gpu_memory_size_, stream);
+#endif
     }
 
     void ImmutableCPU() const {
@@ -465,81 +467,5 @@ class Vector {
   mutable details::COWPtr<VectorData> m_;
 };
 
-#else  // PADDLE_WITH_CUDA
-
-template <typename T>
-class CPUVector : public std::vector<T, std::allocator<T>> {
- public:
-  CPUVector() : std::vector<T>() {}
-  CPUVector(size_t count, const T &value = T())  // NOLINT
-      : std::vector<T>(count, value) {}
-  CPUVector(std::initializer_list<T> init) : std::vector<T>(init) {}
-  CPUVector(const std::vector<T> &other) : std::vector<T>(other) {}  // NOLINT
-  CPUVector(const CPUVector<T> &other) : std::vector<T>(other) {}
-  CPUVector(CPUVector<T> &&other) : std::vector<T>(std::move(other)) {}
-  CPUVector(std::vector<T> &&other)  // NOLINT
-      : std::vector<T>(std::move(other)) {}
-  CPUVector &operator=(const CPUVector &other) {
-    this->assign(other.begin(), other.end());
-    return *this;
-  }
-  CPUVector &operator=(const std::vector<T> &other) {
-    this->assign(other.begin(), other.end());
-    return *this;
-  }
-
-  friend std::ostream &operator<<(std::ostream &os, const CPUVector<T> &other) {
-    std::stringstream ss;
-    for (auto v : other) {
-      os << v << " ";
-    }
-    return os;
-  }
-
-  T &operator[](size_t id) { return this->at(id); }
-
-  const T &operator[](size_t id) const { return this->at(id); }
-
-  template <typename D>
-  void Extend(const D &begin, const D &end) {
-    this->reserve(this->size() + size_t(end - begin));
-    this->insert(this->end(), begin, end);
-  }
-
-  const T *CUDAData(platform::Place place) const {
-    PADDLE_THROW(platform::errors::Unavailable(
-        "Vector::CUDAData() method is not supported in CPU-only version."));
-  }
-
-  T *CUDAMutableData(platform::Place place) {
-    PADDLE_THROW(platform::errors::Unavailable(
-        "Vector::CUDAMutableData() method is not supported in CPU-only "
-        "version."));
-  }
-
-  const T *Data(platform::Place place) const {
-    PADDLE_ENFORCE_EQ(
-        platform::is_cpu_place(place), true,
-        platform::errors::Unavailable(
-            "Vector::Data() method is not supported when not in CPUPlace."));
-    return this->data();
-  }
-
-  T *MutableData(platform::Place place) {
-    PADDLE_ENFORCE_EQ(
-        platform::is_cpu_place(place), true,
-        platform::errors::Unavailable("Vector::MutableData() method is not "
-                                      "supported when not in CPUPlace."));
-    return this->data();
-  }
-
-  const void *Handle() const { return static_cast<const void *>(this); }
-};
-
-template <typename T>
-using Vector = CPUVector<T>;
-
-#endif  // PADDLE_WITH_CUDA
-
 };  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/operators/filter_by_instag_op.h b/paddle/fluid/operators/filter_by_instag_op.h
@@ -31,13 +31,9 @@ namespace operators {
 using Tensor = framework::Tensor;
 using SelectedRows = framework::SelectedRows;
 using LoDTensor = framework::LoDTensor;
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+
 template <typename T>
 using Vector = framework::Vector<T>;
-#else
-template <typename T>
-using Vector = framework::CPUVector<T>;
-#endif
 
 template <typename T>
 class FilterByInstagKernel : public framework::OpKernel<T> {

diff --git a/paddle/fluid/operators/shuffle_batch_op.h b/paddle/fluid/operators/shuffle_batch_op.h
@@ -33,13 +33,9 @@ namespace paddle {
 namespace operators {
 using Tensor = framework::Tensor;
 using LoDTensor = framework::LoDTensor;
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+
 template <typename T>
 using Vector = framework::Vector<T>;
-#else
-template <typename T>
-using Vector = framework::CPUVector<T>;
-#endif
 
 template <typename T>
 class ShuffleBatchKernel : public framework::OpKernel<T> {

diff --git a/paddle/pten/core/CMakeLists.txt b/paddle/pten/core/CMakeLists.txt
@@ -8,7 +8,11 @@ endif()
 
 cc_library(kernel_factory SRCS kernel_factory.cc DEPS enforce convert_utils)
 cc_library(kernel_context SRCS kernel_context.cc DEPS enforce pten_context)
-
 cc_library(tensor_base SRCS tensor_base.cc allocator.cc storage.cc DEPS enforce)
 cc_library(tensor_meta SRCS tensor_meta.cc DEPS enforce)
+
+if(WITH_MKLDNN)
+add_dependencies(tensor_meta mkldnn)
+endif()
+
 cc_library(dense_tensor SRCS dense_tensor.cc DEPS tensor_meta tensor_base)
diff --git a/paddle/pten/core/dense_tensor.h b/paddle/pten/core/dense_tensor.h
@@ -83,9 +83,7 @@ class DenseTensor : public TensorBase,
 
   /// \brief Returns the lod of the tensor.
   /// \return The lod of the tensor.
-  const std::vector<std::vector<size_t>>& lod() const noexcept {
-    return meta_.lod;
-  }
+  const LoD& lod() const noexcept { return meta_.lod; }
 
   /// \brief Returns the data type of the tensor.
   /// \return The data type of the tensor.

diff --git a/paddle/pten/core/tensor_meta.cc b/paddle/pten/core/tensor_meta.cc
@@ -27,7 +27,7 @@ DenseTensorMeta::DenseTensorMeta(DataType dtype,
 DenseTensorMeta::DenseTensorMeta(DataType dtype,
                                  const DDim& dims,
                                  DataLayout layout,
-                                 const std::vector<std::vector<size_t>>& lod)
+                                 const LoD& lod)
     : dims(dims), dtype(dtype), layout(layout), lod(lod) {}
 
 bool DenseTensorMeta::valid() const noexcept {

diff --git a/paddle/pten/core/tensor_meta.h b/paddle/pten/core/tensor_meta.h
@@ -22,15 +22,16 @@ limitations under the License. */
 
 // See Note [ Why still include the fluid headers? ]
 #include "paddle/fluid/framework/ddim.h"
+
 // Note: mixed_vector include many header now, LoD will be
 // used on CUDA device? Can we use small_vector here?
-// #include "paddle/fluid/framework/mixed_vector.h"
+// @zhanlve: Rollback to original LoD for now
+#include "paddle/fluid/framework/mixed_vector.h"
 
 namespace pten {
 
 using DDim = paddle::framework::DDim;
-using LoD = std::vector<std::vector<size_t>>;
-
+using LoD = std::vector<paddle::framework::Vector<size_t>>;
 /// \brief The meta data of dense tensor. Take the structure type
 /// and use all default operations.
 ///
@@ -44,7 +45,7 @@ struct DenseTensorMeta {
   DenseTensorMeta(DataType dtype,
                   const DDim& dims,
                   DataLayout layout,
-                  const std::vector<std::vector<size_t>>& lod);
+                  const LoD& lod);
 
   /// \brief Test whether the metadata is valid. Does not throw exceptions.
   /// \return Whether the metadata is valid.

diff --git a/paddle/pten/tests/api/test_tensor_utils.cc b/paddle/pten/tests/api/test_tensor_utils.cc
@@ -15,6 +15,7 @@ limitations under the License. */
 #include "gtest/gtest.h"
 
 #include "paddle/pten/api/lib/utils/tensor_utils.h"
+#include "paddle/pten/core/tensor_meta.h"
 
 namespace paddle {
 namespace tests {
@@ -30,7 +31,7 @@ TEST(tensor_utils, dense_tensor_to_lod_tensor) {
   const DDim dims({2, 1});
   const DataType dtype{DataType::FLOAT32};
   const DataLayout layout{DataLayout::NCHW};
-  const std::vector<std::vector<size_t>> lod{{0, 2}};
+  const pten::LoD lod{{0, 2}};
   DenseTensorMeta meta(dtype, dims, layout, lod);
 
   auto alloc =
@@ -46,7 +47,7 @@ TEST(tensor_utils, dense_tensor_to_lod_tensor) {
 
   CHECK(dense_tensor.lod().size() == lod_tensor.lod().size());
   CHECK(dense_tensor.lod()[0] ==
-        static_cast<std::vector<size_t>>((lod_tensor.lod()[0])));
+        static_cast<paddle::framework::Vector<size_t>>((lod_tensor.lod()[0])));
   CHECK(dense_tensor.dtype() == pten::TransToPtenDataType(lod_tensor.type()));
   CHECK(dense_tensor.layout() ==
         pten::TransToPtenDataLayout(lod_tensor.layout()));

diff --git a/paddle/pten/tests/core/test_dense_tensor.cc b/paddle/pten/tests/core/test_dense_tensor.cc
@@ -25,7 +25,7 @@ TEST(dense_tensor, meta) {
   const DataType dtype{DataType::INT8};
   const DataLayout layout{DataLayout::NHWC};
   // TODO(Shixiaowei02): need to check the lod is valid.
-  const std::vector<std::vector<size_t>> lod{};
+  const LoD lod{};
 
   DenseTensorMeta meta_0;
   CHECK(!meta_0.valid());
@@ -72,7 +72,7 @@ TEST(dense_tensor, ctor) {
   const DDim dims({1, 2});
   const DataType dtype{DataType::INT8};
   const DataLayout layout{DataLayout::NHWC};
-  const std::vector<std::vector<size_t>> lod{};
+  const LoD lod{};
   DenseTensorMeta meta(dtype, dims, layout, lod);
 
   auto alloc = std::make_shared<FancyAllocator>();
@@ -106,7 +106,7 @@ TEST(dense_tensor, resize) {
   const DDim dims({1, 2});
   const DataType dtype{DataType::INT8};
   const DataLayout layout{DataLayout::NHWC};
-  const std::vector<std::vector<size_t>> lod{};
+  const LoD lod{};
   DenseTensorMeta meta(dtype, dims, layout, lod);
 
   auto alloc = std::make_shared<FancyAllocator>();
@@ -126,7 +126,7 @@ TEST(dense_tensor, shallow_copy) {
   const DDim dims({1, 2});
   const DataType dtype{DataType::INT8};
   const DataLayout layout{DataLayout::NHWC};
-  const std::vector<std::vector<size_t>> lod{};
+  const LoD lod{};
   DenseTensorMeta meta(dtype, dims, layout, lod);
 
   auto alloc = std::make_shared<FancyAllocator>();