Skip to content

Commit dfdc996

Browse files
authored
[Unify Tensors PR #3]Port framework::Tensor members & interfaces to pten::DenseTensor, test=allcases (PaddlePaddle#38473)
* Added shared_ptr<Allocation> member & corresponding interfaces to Storage * Removed original pten::Allocation from Storage and adjusted the interfaces accordingly * Fixed issues with storage offset * Used place to malloc allocation for TensorStorage * [Unify Tensors PR #3]Ported framework::Tensor interfaces to pten::DenseTensor * Fixed issues with place * Added comments * Moved mutable_data with stream argument to DenseTensor * Added set_offset interface * Fixed CI issues,test=allcases * [Unify Tensors PR #4] Port LoDTensor interfaces to DenseTensor * Reverted changes too pten_layout() interface * Removed friend classes
1 parent a7b13d3 commit dfdc996

File tree

11 files changed

+721
-25
lines changed

11 files changed

+721
-25
lines changed

paddle/fluid/framework/CMakeLists.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,15 +68,15 @@ cc_test(data_type_test SRCS data_type_test.cc DEPS data_type place tensor)
6868
if(WITH_GPU)
6969
if (WIN32)
7070
windows_symbolic(tensor_util SRCS tensor_util.cu)
71-
nv_library(tensor SRCS tensor.cc .tensor_util.cu DEPS place memory data_type device_context)
71+
nv_library(tensor SRCS tensor.cc .tensor_util.cu DEPS place memory data_type device_context dense_tensor)
7272
add_dependencies(tensor tensor_util)
7373
else()
74-
nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context profiler)
74+
nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context profiler dense_tensor)
7575
endif(WIN32)
7676
elseif(WITH_ROCM)
77-
hip_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context profiler)
77+
hip_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context profiler dense_tensor)
7878
else()
79-
cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS place memory data_type device_context profiler)
79+
cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS place memory data_type device_context profiler dense_tensor)
8080
endif()
8181

8282
cc_test(tensor_test SRCS tensor_test.cc DEPS tensor)

paddle/fluid/framework/tensor.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,8 @@ class Tensor {
120120
explicit Tensor(const proto::VarType::Type&);
121121

122122
/*! Return a pointer to mutable memory block. */
123+
const void* data() const;
124+
123125
template <typename T>
124126
T* data();
125127

paddle/fluid/framework/tensor_impl.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,12 @@ inline T* Tensor::data() {
5454
offset_);
5555
}
5656

57+
inline const void* Tensor::data() const {
58+
check_memory_size();
59+
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
60+
offset_);
61+
}
62+
5763
template <typename T>
5864
inline T* Tensor::mutable_data(const DDim& dims, const platform::Place& place,
5965
size_t requested_size) {

paddle/fluid/framework/tensor_util.cc

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,18 +23,22 @@ limitations under the License. */
2323
#include "paddle/fluid/framework/tensor_util.h"
2424
#include "paddle/fluid/platform/complex.h"
2525
#include "paddle/fluid/platform/profiler.h"
26+
27+
#include "paddle/pten/core/dense_tensor.h"
28+
2629
#ifdef PADDLE_WITH_MKLDNN
2730
#include "dnnl_debug.h" // NOLINT
2831
#endif
2932

3033
namespace paddle {
3134
namespace framework {
3235

33-
void TensorCopy(const Tensor& src, const platform::Place& dst_place,
34-
const platform::DeviceContext& ctx, Tensor* dst) {
36+
template <typename TENSOR>
37+
void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
38+
const platform::DeviceContext& ctx, TENSOR* dst) {
3539
if (&src == dst) {
3640
auto src_copy = src;
37-
TensorCopy(src_copy, dst_place, ctx, dst);
41+
TensorCopyImpl(src_copy, dst_place, ctx, dst);
3842
return;
3943
}
4044

@@ -45,7 +49,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
4549
dst->Resize(src.dims());
4650
dst->set_layout(src.layout());
4751
auto src_place = src.place();
48-
auto src_ptr = src.data<void>();
52+
auto src_ptr = src.data();
4953
#ifdef PADDLE_WITH_MKLDNN
5054
dst->set_format(src.format());
5155
// oneDNN tensors due to padding may be of bigger size
@@ -389,16 +393,34 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
389393
#endif
390394
}
391395

392-
void TensorCopy(const Tensor& src, const platform::Place& dst_place,
393-
Tensor* dst) {
396+
template <typename TENSOR>
397+
void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
398+
TENSOR* dst) {
394399
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
395400
const platform::DeviceContext* dev_ctx;
396401
if (platform::is_gpu_place(dst_place) || platform::is_npu_place(dst_place)) {
397402
dev_ctx = pool.Get(dst_place);
398403
} else {
399404
dev_ctx = pool.Get(src.place());
400405
}
401-
TensorCopy(src, dst_place, *dev_ctx, dst);
406+
TensorCopyImpl(src, dst_place, *dev_ctx, dst);
407+
}
408+
409+
void TensorCopy(const Tensor& src, const platform::Place& dst_place,
410+
Tensor* dst) {
411+
TensorCopyImpl<Tensor>(src, dst_place, dst);
412+
}
413+
void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place,
414+
pten::DenseTensor* dst) {
415+
TensorCopyImpl<pten::DenseTensor>(src, dst_place, dst);
416+
}
417+
void TensorCopy(const Tensor& src, const platform::Place& dst_place,
418+
const platform::DeviceContext& ctx, Tensor* dst) {
419+
TensorCopyImpl<Tensor>(src, dst_place, ctx, dst);
420+
}
421+
void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place,
422+
const platform::DeviceContext& ctx, pten::DenseTensor* dst) {
423+
TensorCopyImpl<pten::DenseTensor>(src, dst_place, ctx, dst);
402424
}
403425

404426
void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
@@ -418,7 +440,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
418440
dst->set_format(src.format());
419441
#endif
420442
auto src_place = src.place();
421-
auto src_ptr = src.data<void>();
443+
auto src_ptr = src.data();
422444
auto dst_ptr = dst->mutable_data(dst_place, src.type());
423445

424446
if (src_ptr == dst_ptr && src_place == dst_place) {
@@ -971,7 +993,7 @@ void TensorToStream(std::ostream& os, const Tensor& tensor,
971993
{ // the 3rd field, tensor data
972994
uint64_t size = tensor.numel() * framework::SizeOfType(tensor.type());
973995

974-
auto* data_ptr = tensor.data<void>();
996+
auto* data_ptr = tensor.data();
975997
PADDLE_ENFORCE_LT(size, (std::numeric_limits<std::streamsize>::max)(),
976998
platform::errors::ResourceExhausted(
977999
"tensor size %d overflow when writing tensor", size));

paddle/fluid/framework/tensor_util.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ limitations under the License. */
3434
#include "paddle/fluid/platform/device/mlu/device_context.h"
3535
#endif
3636

37+
#include "paddle/pten/core/dense_tensor.h"
38+
3739
namespace paddle {
3840
namespace framework {
3941

@@ -75,6 +77,8 @@ class Tensor;
7577

7678
void TensorCopy(const Tensor& src, const platform::Place& dst_place,
7779
const platform::DeviceContext& ctx, Tensor* dst);
80+
void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place,
81+
const platform::DeviceContext& ctx, pten::DenseTensor* dst);
7882

7983
// NOTE(zcd): If the src.place() and dst_place are two different GPU,
8084
// the copy operation is carried out on the dst_place's stream. This is
@@ -85,6 +89,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
8589
// not completed.
8690
void TensorCopy(const Tensor& src, const platform::Place& dst_place,
8791
Tensor* dst);
92+
void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place,
93+
pten::DenseTensor* dst);
8894

8995
void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
9096
Tensor* dst);

paddle/pten/core/CMakeLists.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ endif()
99
cc_library(kernel_factory SRCS kernel_factory.cc DEPS enforce convert_utils)
1010
cc_library(kernel_context SRCS kernel_context.cc DEPS enforce pten_context)
1111
cc_library(tensor_base SRCS tensor_base.cc allocator.cc storage.cc DEPS enforce)
12+
1213
cc_library(tensor_meta SRCS tensor_meta.cc DEPS enforce mixed_vector)
14+
cc_library(dense_tensor SRCS dense_tensor.cc DEPS convert_utils tensor_meta tensor_base)
1315

14-
cc_library(dense_tensor SRCS dense_tensor.cc DEPS tensor_meta tensor_base)
16+
# Will remove once we implemented MKLDNN_Tensor
17+
if(WITH_MKLDNN)
18+
add_dependencies(dense_tensor mkldnn)
19+
endif()

0 commit comments

Comments
 (0)