Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 5 additions & 79 deletions paddle/fluid/framework/mixed_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,15 @@ limitations under the License. */

#include "glog/logging.h"
#include "paddle/fluid/framework/details/cow_ptr.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/utils/none.h"
#include "paddle/utils/optional.h"

namespace paddle {
namespace framework {

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
// Vector<T> implements the std::vector interface, and can get Data or
// MutableData from any place. The data will be synced implicitly inside.
template <typename T>
Expand Down Expand Up @@ -213,6 +211,7 @@ class Vector {
};

void CopyToCPU() const {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
// COPY GPU Data To CPU
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get(gpu_->place()));
Expand All @@ -222,6 +221,7 @@ class Vector {
paddle::memory::Copy(platform::CPUPlace(), dst, CUDAPlace().get(), src,
gpu_memory_size_, stream);
dev_ctx->Wait();
#endif
}

void MutableCPU() {
Expand Down Expand Up @@ -261,6 +261,7 @@ class Vector {
}

void CopyCPUDataToCUDA(const platform::Place &place) const {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
Copy link
Contributor

@Shixiaowei02 Shixiaowei02 Dec 23, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

建议 #elif 分支抛异常

void *src = cpu_.data();
gpu_memory_size_ = cpu_.size() * sizeof(T);
gpu_ = memory::Alloc(place, gpu_memory_size_);
Expand All @@ -270,6 +271,7 @@ class Vector {
auto stream = dev_ctx->stream();
paddle::memory::Copy(CUDAPlace().get(), dst, platform::CPUPlace(), src,
gpu_memory_size_, stream);
#endif
}

void ImmutableCPU() const {
Expand Down Expand Up @@ -465,81 +467,5 @@ class Vector {
mutable details::COWPtr<VectorData> m_;
};

#else // PADDLE_WITH_CUDA

template <typename T>
class CPUVector : public std::vector<T, std::allocator<T>> {
public:
CPUVector() : std::vector<T>() {}
CPUVector(size_t count, const T &value = T()) // NOLINT
: std::vector<T>(count, value) {}
CPUVector(std::initializer_list<T> init) : std::vector<T>(init) {}
CPUVector(const std::vector<T> &other) : std::vector<T>(other) {} // NOLINT
CPUVector(const CPUVector<T> &other) : std::vector<T>(other) {}
CPUVector(CPUVector<T> &&other) : std::vector<T>(std::move(other)) {}
CPUVector(std::vector<T> &&other) // NOLINT
: std::vector<T>(std::move(other)) {}
CPUVector &operator=(const CPUVector &other) {
this->assign(other.begin(), other.end());
return *this;
}
CPUVector &operator=(const std::vector<T> &other) {
this->assign(other.begin(), other.end());
return *this;
}

friend std::ostream &operator<<(std::ostream &os, const CPUVector<T> &other) {
std::stringstream ss;
for (auto v : other) {
os << v << " ";
}
return os;
}

T &operator[](size_t id) { return this->at(id); }

const T &operator[](size_t id) const { return this->at(id); }

template <typename D>
void Extend(const D &begin, const D &end) {
this->reserve(this->size() + size_t(end - begin));
this->insert(this->end(), begin, end);
}

const T *CUDAData(platform::Place place) const {
PADDLE_THROW(platform::errors::Unavailable(
"Vector::CUDAData() method is not supported in CPU-only version."));
}

T *CUDAMutableData(platform::Place place) {
PADDLE_THROW(platform::errors::Unavailable(
"Vector::CUDAMutableData() method is not supported in CPU-only "
"version."));
}

const T *Data(platform::Place place) const {
PADDLE_ENFORCE_EQ(
platform::is_cpu_place(place), true,
platform::errors::Unavailable(
"Vector::Data() method is not supported when not in CPUPlace."));
return this->data();
}

T *MutableData(platform::Place place) {
PADDLE_ENFORCE_EQ(
platform::is_cpu_place(place), true,
platform::errors::Unavailable("Vector::MutableData() method is not "
"supported when not in CPUPlace."));
return this->data();
}

const void *Handle() const { return static_cast<const void *>(this); }
};

template <typename T>
using Vector = CPUVector<T>;

#endif // PADDLE_WITH_CUDA

}; // namespace framework
} // namespace paddle
6 changes: 1 addition & 5 deletions paddle/fluid/operators/filter_by_instag_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,9 @@ namespace operators {
using Tensor = framework::Tensor;
using SelectedRows = framework::SelectedRows;
using LoDTensor = framework::LoDTensor;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)

template <typename T>
using Vector = framework::Vector<T>;
#else
template <typename T>
using Vector = framework::CPUVector<T>;
#endif

template <typename T>
class FilterByInstagKernel : public framework::OpKernel<T> {
Expand Down
6 changes: 1 addition & 5 deletions paddle/fluid/operators/shuffle_batch_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,9 @@ namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)

template <typename T>
using Vector = framework::Vector<T>;
#else
template <typename T>
using Vector = framework::CPUVector<T>;
#endif

template <typename T>
class ShuffleBatchKernel : public framework::OpKernel<T> {
Expand Down
6 changes: 5 additions & 1 deletion paddle/pten/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ endif()

cc_library(kernel_factory SRCS kernel_factory.cc DEPS enforce convert_utils)
cc_library(kernel_context SRCS kernel_context.cc DEPS enforce pten_context)

cc_library(tensor_base SRCS tensor_base.cc allocator.cc storage.cc DEPS enforce)
cc_library(tensor_meta SRCS tensor_meta.cc DEPS enforce)

if(WITH_MKLDNN)
add_dependencies(tensor_meta mkldnn)
endif()

cc_library(dense_tensor SRCS dense_tensor.cc DEPS tensor_meta tensor_base)
4 changes: 1 addition & 3 deletions paddle/pten/core/dense_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,7 @@ class DenseTensor : public TensorBase,

/// \brief Returns the lod of the tensor.
/// \return The lod of the tensor.
const std::vector<std::vector<size_t>>& lod() const noexcept {
return meta_.lod;
}
const LoD& lod() const noexcept { return meta_.lod; }

/// \brief Returns the data type of the tensor.
/// \return The data type of the tensor.
Expand Down
2 changes: 1 addition & 1 deletion paddle/pten/core/tensor_meta.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ DenseTensorMeta::DenseTensorMeta(DataType dtype,
DenseTensorMeta::DenseTensorMeta(DataType dtype,
const DDim& dims,
DataLayout layout,
const std::vector<std::vector<size_t>>& lod)
const LoD& lod)
: dims(dims), dtype(dtype), layout(layout), lod(lod) {}

bool DenseTensorMeta::valid() const noexcept {
Expand Down
9 changes: 5 additions & 4 deletions paddle/pten/core/tensor_meta.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,16 @@ limitations under the License. */

// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/framework/ddim.h"

// Note: mixed_vector include many header now, LoD will be
// used on CUDA device? Can we use small_vector here?
// #include "paddle/fluid/framework/mixed_vector.h"
// @zhanlve: Rollback to original LoD for now
#include "paddle/fluid/framework/mixed_vector.h"

namespace pten {

using DDim = paddle::framework::DDim;
using LoD = std::vector<std::vector<size_t>>;

using LoD = std::vector<paddle::framework::Vector<size_t>>;
/// \brief The meta data of dense tensor. Take the structure type
/// and use all default operations.
///
Expand All @@ -44,7 +45,7 @@ struct DenseTensorMeta {
DenseTensorMeta(DataType dtype,
const DDim& dims,
DataLayout layout,
const std::vector<std::vector<size_t>>& lod);
const LoD& lod);

/// \brief Test whether the metadata is valid. Does not throw exceptions.
/// \return Whether the metadata is valid.
Expand Down
5 changes: 3 additions & 2 deletions paddle/pten/tests/api/test_tensor_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ limitations under the License. */
#include "gtest/gtest.h"

#include "paddle/pten/api/lib/utils/tensor_utils.h"
#include "paddle/pten/core/tensor_meta.h"

namespace paddle {
namespace tests {
Expand All @@ -30,7 +31,7 @@ TEST(tensor_utils, dense_tensor_to_lod_tensor) {
const DDim dims({2, 1});
const DataType dtype{DataType::FLOAT32};
const DataLayout layout{DataLayout::NCHW};
const std::vector<std::vector<size_t>> lod{{0, 2}};
const pten::LoD lod{{0, 2}};
DenseTensorMeta meta(dtype, dims, layout, lod);

auto alloc =
Expand All @@ -46,7 +47,7 @@ TEST(tensor_utils, dense_tensor_to_lod_tensor) {

CHECK(dense_tensor.lod().size() == lod_tensor.lod().size());
CHECK(dense_tensor.lod()[0] ==
static_cast<std::vector<size_t>>((lod_tensor.lod()[0])));
static_cast<paddle::framework::Vector<size_t>>((lod_tensor.lod()[0])));
CHECK(dense_tensor.dtype() == pten::TransToPtenDataType(lod_tensor.type()));
CHECK(dense_tensor.layout() ==
pten::TransToPtenDataLayout(lod_tensor.layout()));
Expand Down
8 changes: 4 additions & 4 deletions paddle/pten/tests/core/test_dense_tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ TEST(dense_tensor, meta) {
const DataType dtype{DataType::INT8};
const DataLayout layout{DataLayout::NHWC};
// TODO(Shixiaowei02): need to check the lod is valid.
const std::vector<std::vector<size_t>> lod{};
const LoD lod{};

DenseTensorMeta meta_0;
CHECK(!meta_0.valid());
Expand Down Expand Up @@ -72,7 +72,7 @@ TEST(dense_tensor, ctor) {
const DDim dims({1, 2});
const DataType dtype{DataType::INT8};
const DataLayout layout{DataLayout::NHWC};
const std::vector<std::vector<size_t>> lod{};
const LoD lod{};
DenseTensorMeta meta(dtype, dims, layout, lod);

auto alloc = std::make_shared<FancyAllocator>();
Expand Down Expand Up @@ -106,7 +106,7 @@ TEST(dense_tensor, resize) {
const DDim dims({1, 2});
const DataType dtype{DataType::INT8};
const DataLayout layout{DataLayout::NHWC};
const std::vector<std::vector<size_t>> lod{};
const LoD lod{};
DenseTensorMeta meta(dtype, dims, layout, lod);

auto alloc = std::make_shared<FancyAllocator>();
Expand All @@ -126,7 +126,7 @@ TEST(dense_tensor, shallow_copy) {
const DDim dims({1, 2});
const DataType dtype{DataType::INT8};
const DataLayout layout{DataLayout::NHWC};
const std::vector<std::vector<size_t>> lod{};
const LoD lod{};
DenseTensorMeta meta(dtype, dims, layout, lod);

auto alloc = std::make_shared<FancyAllocator>();
Expand Down