Skip to content

Commit 0f23510

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into fix_ellipsis
2 parents 166ced5 + 895692e commit 0f23510

File tree

19 files changed

+684
-135
lines changed

19 files changed

+684
-135
lines changed

paddle/fluid/framework/operator.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1883,6 +1883,10 @@ void OperatorWithKernel::BuildPtenKernelContext(
18831883
pt_kernel_context_->EmplaceBackAttr(BOOST_GET_CONST(float, attr));
18841884
} else if (attr_defs[i].type_index == std::type_index(typeid(bool))) {
18851885
pt_kernel_context_->EmplaceBackAttr(BOOST_GET_CONST(bool, attr));
1886+
} else if (attr_defs[i].type_index ==
1887+
std::type_index(typeid(std::vector<int>))) {
1888+
pt_kernel_context_->EmplaceBackAttr(
1889+
BOOST_GET_CONST(std::vector<int>, attr));
18861890
} else {
18871891
PADDLE_THROW(platform::errors::Unimplemented(
18881892
"unsupported cast op attribute `%s` when construct "

paddle/fluid/imperative/prepared_operator.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,9 @@ static void BuildDygraphPtenKernelContext(
372372
kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(float, attr));
373373
} else if (attr_defs[i].type_index == std::type_index(typeid(bool))) {
374374
kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(bool, attr));
375+
} else if (attr_defs[i].type_index ==
376+
std::type_index(typeid(std::vector<int>))) {
377+
kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(std::vector<int>, attr));
375378
} else {
376379
PADDLE_THROW(platform::errors::Unimplemented(
377380
"unsupported cast op attribute `%s` when construct "

paddle/fluid/operators/reshape_op.cc

Lines changed: 122 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,12 @@ limitations under the License. */
1515
#include <string>
1616

1717
#include "paddle/fluid/framework/op_registry.h"
18+
#include "paddle/fluid/framework/pten_utils.h"
1819

20+
// only can include the headers in paddle/pten/api dirs
21+
#include "paddle/pten/api/lib/utils/tensor_utils.h"
22+
#include "paddle/pten/include/core.h"
23+
#include "paddle/pten/include/manipulation.h"
1924
namespace paddle {
2025
namespace framework {
2126
class InferShapeContext;
@@ -248,13 +253,6 @@ class ReshapeOp : public framework::OperatorWithKernel {
248253
auto input_data_type =
249254
framework::OperatorWithKernel::IndicateVarDataType(ctx, "X");
250255

251-
//#ifdef PADDLE_WITH_MKLDNN
252-
// if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
253-
// return framework::OpKernelType(input_data_type, ctx.GetPlace(),
254-
// framework::DataLayout::kMKLDNN,
255-
// framework::LibraryType::kMKLDNN);
256-
// }
257-
//#endif
258256
return framework::OpKernelType(input_data_type, ctx.GetPlace());
259257
}
260258

@@ -366,13 +364,6 @@ class ReshapeGradOp : public framework::OperatorWithKernel {
366364
auto input_data_type =
367365
framework::OperatorWithKernel::IndicateVarDataType(ctx, "X");
368366

369-
//#ifdef PADDLE_WITH_MKLDNN
370-
// if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
371-
// return framework::OpKernelType(input_data_type, ctx.GetPlace(),
372-
// framework::DataLayout::kMKLDNN,
373-
// framework::LibraryType::kMKLDNN);
374-
// }
375-
//#endif
376367
return framework::OpKernelType(input_data_type, ctx.GetPlace());
377368
}
378369
};
@@ -382,42 +373,117 @@ class ReshapeKernel {
382373
void operator()(const framework::ExecutionContext &ctx) const {
383374
auto *out = ctx.Output<framework::LoDTensor>("Out");
384375
auto *in = ctx.Input<framework::LoDTensor>("X");
385-
386-
framework::DDim out_dims = out->dims();
376+
// framework::DDim out_dims = out->dims();
377+
auto pt_x = paddle::experimental::MakePtenDenseTensor(*in);
378+
379+
// we can't MakePtenDenseTensor by out, because reshape will realloc memory
380+
// and this will throw error(can't realloc shared memory) in current
381+
// DenseTensor
382+
// design. So, codes below create a tmp densetensor for output.
383+
// TODO(YuanRisheng) we can use MakePtenDenseTensor after #36916 merge.
384+
const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
385+
paddle::platform::CPUPlace());
386+
pten::DenseTensorMeta meta{pten::TransToPtenDataType(in->type()),
387+
in->dims(),
388+
pten::TransToPtenDataLayout(in->layout())};
389+
auto pt_out_tmp =
390+
std::make_shared<pten::DenseTensor>(alloc, std::move(meta));
391+
pten::DenseTensor *pt_out = nullptr;
392+
if (in == out) {
393+
pt_out = pt_x.get();
394+
} else {
395+
pt_out = pt_out_tmp.get();
396+
}
387397

388398
auto list_new_shape_tensor =
389399
ctx.MultiInput<framework::Tensor>("ShapeTensor");
400+
auto *shape_tensor = ctx.HasInput("Shape")
401+
? ctx.Input<framework::LoDTensor>("Shape")
402+
: nullptr;
390403
if (list_new_shape_tensor.size() > 0) {
391404
// have shape tensor
392-
auto new_shape = get_new_shape(list_new_shape_tensor);
393-
out_dims = ReshapeOp::ValidateShape(new_shape, in->dims());
405+
std::vector<pten::DenseTensor> pt_vec_shape;
406+
for (auto &tensor : list_new_shape_tensor) {
407+
if (platform::is_gpu_place(tensor->place()) ||
408+
platform::is_xpu_place(tensor->place())) {
409+
framework::Tensor temp;
410+
TensorCopySync(*tensor, platform::CPUPlace(), &temp);
411+
pt_vec_shape.push_back(
412+
std::move(*(paddle::experimental::MakePtenDenseTensor(temp))));
413+
} else {
414+
pt_vec_shape.push_back(
415+
std::move(*(paddle::experimental::MakePtenDenseTensor(*tensor))));
416+
}
417+
}
418+
if (platform::is_cpu_place(ctx.GetPlace())) {
419+
auto &dev_ctx = ctx.device_context<platform::CPUDeviceContext>();
420+
pten::ReshapeFromVectorDT(dev_ctx, *pt_x.get(), pt_vec_shape, pt_out);
421+
}
422+
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
423+
if (platform::is_gpu_place(ctx.GetPlace())) {
424+
auto &dev_ctx = ctx.device_context<platform::CUDADeviceContext>();
425+
pten::ReshapeFromVectorDT(dev_ctx, *pt_x.get(), pt_vec_shape, pt_out);
426+
}
427+
#endif
428+
#ifdef PADDLE_WITH_XPU
429+
if (platform::is_xpu_place(ctx.GetPlace())) {
430+
auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
431+
pten::ReshapeFromVectorDT(dev_ctx, *pt_x.get(), pt_vec_shape, pt_out);
432+
}
433+
#endif
434+
} else if (shape_tensor) {
435+
std::unique_ptr<pten::DenseTensor> pt_shape;
436+
if (platform::is_gpu_place(shape_tensor->place()) ||
437+
platform::is_xpu_place(shape_tensor->place())) {
438+
framework::Tensor temp;
439+
TensorCopySync(*shape_tensor, platform::CPUPlace(), &temp);
440+
pt_shape = paddle::experimental::MakePtenDenseTensor(temp);
441+
} else {
442+
pt_shape = paddle::experimental::MakePtenDenseTensor(*shape_tensor);
443+
}
394444

445+
if (platform::is_cpu_place(ctx.GetPlace())) {
446+
auto &dev_ctx = ctx.device_context<platform::CPUDeviceContext>();
447+
pten::ReshapeFromDT(dev_ctx, *pt_x.get(), *pt_shape.get(), pt_out);
448+
}
449+
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
450+
if (platform::is_gpu_place(ctx.GetPlace())) {
451+
auto &dev_ctx = ctx.device_context<platform::CUDADeviceContext>();
452+
pten::ReshapeFromDT(dev_ctx, *pt_x.get(), *pt_shape.get(), pt_out);
453+
}
454+
#endif
455+
#ifdef PADDLE_WITH_XPU
456+
if (platform::is_xpu_place(ctx.GetPlace())) {
457+
auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
458+
pten::ReshapeFromDT(dev_ctx, *pt_x.get(), *pt_shape.get(), pt_out);
459+
}
460+
#endif
395461
} else {
396-
auto *shape_tensor = ctx.HasInput("Shape")
397-
? ctx.Input<framework::LoDTensor>("Shape")
398-
: nullptr;
399-
400-
if (shape_tensor) {
401-
auto *shape_data = shape_tensor->data<int>();
402-
framework::Tensor cpu_shape_tensor;
403-
if (platform::is_gpu_place(shape_tensor->place()) ||
404-
platform::is_xpu_place(shape_tensor->place())) {
405-
TensorCopySync(*shape_tensor, platform::CPUPlace(),
406-
&cpu_shape_tensor);
407-
shape_data = cpu_shape_tensor.data<int>();
408-
}
409-
auto shape =
410-
std::vector<int>(shape_data, shape_data + shape_tensor->numel());
411-
out_dims = ReshapeOp::ValidateShape(shape, in->dims());
462+
auto &shape_vec = ctx.Attr<std::vector<int>>("shape");
463+
if (platform::is_cpu_place(ctx.GetPlace())) {
464+
auto &dev_ctx = ctx.device_context<platform::CPUDeviceContext>();
465+
pten::ReshapeFromVectorVal(dev_ctx, *pt_x.get(), shape_vec, pt_out);
466+
}
467+
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
468+
if (platform::is_gpu_place(ctx.GetPlace())) {
469+
auto &dev_ctx = ctx.device_context<platform::CUDADeviceContext>();
470+
pten::ReshapeFromVectorVal(dev_ctx, *pt_x.get(), shape_vec, pt_out);
412471
}
472+
#endif
473+
#ifdef PADDLE_WITH_XPU
474+
if (platform::is_xpu_place(ctx.GetPlace())) {
475+
auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
476+
pten::ReshapeFromVectorVal(dev_ctx, *pt_x.get(), shape_vec, pt_out);
477+
}
478+
#endif
479+
}
480+
// non-inplace need move all result from pt_out to out, inplace need set
481+
// result dims.
482+
if (in != out) {
483+
paddle::experimental::MovesStorage(pt_out, static_cast<Tensor *>(out));
484+
} else {
485+
out->Resize(pt_out->dims());
413486
}
414-
415-
out->Resize(out_dims);
416-
out->mutable_data(ctx.GetPlace(), in->type());
417-
framework::TensorCopy(
418-
*in, ctx.GetPlace(),
419-
ctx.template device_context<platform::DeviceContext>(), out);
420-
out->Resize(out_dims);
421487
}
422488
};
423489

@@ -479,6 +545,21 @@ class Reshape2Op : public ReshapeOp {
479545

480546
ReshapeOp::InferShape(ctx);
481547
}
548+
549+
framework::KernelSignature GetExpectedPtenKernelArgs(
550+
const framework::ExecutionContext &ctx) const override {
551+
auto multi_inputs = ctx.MultiInput<framework::Tensor>("ShapeTensor");
552+
if (multi_inputs.size() > 0) {
553+
return framework::KernelSignature(
554+
"reshape2.mulhost.mid", {"X", "ShapeTensor"}, {}, {"XShape", "Out"});
555+
} else if (ctx.HasInput("Shape")) {
556+
return framework::KernelSignature("reshape2.host.mid", {"X", "Shape"}, {},
557+
{"XShape", "Out"});
558+
} else {
559+
return framework::KernelSignature("reshape2.mid", {"X"}, {"shape"},
560+
{"XShape", "Out"});
561+
}
562+
}
482563
};
483564

484565
class Reshape2OpMaker : public ReshapeOpMaker {
@@ -557,13 +638,6 @@ class Reshape2GradOp : public framework::OperatorWithKernel {
557638
auto input_data_type = framework::OperatorWithKernel::IndicateVarDataType(
558639
ctx, framework::GradVarName("Out"));
559640

560-
//#ifdef PADDLE_WITH_MKLDNN
561-
// if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
562-
// return framework::OpKernelType(input_data_type, ctx.GetPlace(),
563-
// framework::DataLayout::kMKLDNN,
564-
// framework::LibraryType::kMKLDNN);
565-
// }
566-
//#endif
567641
return framework::OpKernelType(input_data_type, ctx.GetPlace());
568642
}
569643

paddle/pten/core/kernel_registry.h

Lines changed: 27 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -114,34 +114,16 @@ struct KernelRegistrar {
114114
KernelArgsParseFn args_parse_fn,
115115
KernelArgsDefFn args_def_fn,
116116
KernelFn kernel_fn) {
117-
if (layout == DataLayout::ANY) {
118-
for (size_t layout_iter = static_cast<size_t>(DataLayout::NHWC);
119-
layout_iter != static_cast<size_t>(DataLayout::NUM_DATA_LAYOUTS);
120-
layout_iter++) {
121-
for (size_t dtype = static_cast<size_t>(DataType::BOOL);
122-
dtype != static_cast<size_t>(DataType::NUM_DATA_TYPES);
123-
dtype++) {
124-
ConstructKernel(kernel_name_cstr,
125-
backend,
126-
static_cast<DataLayout>(layout_iter),
127-
static_cast<DataType>(dtype),
128-
args_parse_fn,
129-
args_def_fn,
130-
kernel_fn);
131-
}
132-
}
133-
} else {
134-
for (size_t dtype = static_cast<size_t>(DataType::BOOL);
135-
dtype != static_cast<size_t>(DataType::NUM_DATA_TYPES);
136-
dtype++) {
137-
ConstructKernel(kernel_name_cstr,
138-
backend,
139-
layout,
140-
static_cast<DataType>(dtype),
141-
args_parse_fn,
142-
args_def_fn,
143-
kernel_fn);
144-
}
117+
for (size_t dtype = static_cast<size_t>(DataType::BOOL);
118+
dtype != static_cast<size_t>(DataType::NUM_DATA_TYPES);
119+
dtype++) {
120+
ConstructKernel(kernel_name_cstr,
121+
backend,
122+
layout,
123+
static_cast<DataType>(dtype),
124+
args_parse_fn,
125+
args_def_fn,
126+
kernel_fn);
145127
}
146128
}
147129

@@ -158,7 +140,6 @@ struct KernelRegistrar {
158140
Kernel kernel(kernel_fn);
159141
args_parse_fn(kernel_key, kernel.mutable_args_def());
160142
args_def_fn(&kernel);
161-
162143
KernelFactory::Instance().InsertCompatibleOpType(kernel_name.name());
163144
KernelFactory::Instance().kernels()[kernel_name][kernel_key] = kernel;
164145
}
@@ -838,21 +819,22 @@ struct KernelRegistrar {
838819
_PT_REGISTER_KERNEL_WITH_NO_TYPE( \
839820
kernel_name, PT_ID, backend, layout, meta_kernel_fn)
840821

841-
#define _PT_REGISTER_KERNEL_WITH_NO_TYPE( \
842-
kernel_name, func_id, backend, layout, meta_kernel_fn) \
843-
PT_STATIC_ASSERT_GLOBAL_NAMESPACE( \
844-
PT_CONCATENATE(pt_op_kernel_ns_check_, func_id), \
845-
"PT_REGISTER_KERNEL must be called in global namespace."); \
846-
decltype(meta_kernel_fn) meta_kernel_fn; \
847-
static void PT_CONCATENATE(__PT_KERNEL_args_def_FN_, \
848-
func_id)(::pten::Kernel*); \
849-
static const ::pten::KernelRegistrar __reg_pt_op_kernel_##func_id( \
850-
kernel_name, \
851-
BACKEND(backend), \
852-
DATALAYOUT(layout), \
853-
::pten::KernelArgsParseFunctor<decltype(&meta_kernel_fn)>::Parse, \
854-
&PT_CONCATENATE(__PT_KERNEL_args_def_FN_, func_id), \
855-
PT_KERNEL(meta_kernel_fn)); \
856-
void PT_CONCATENATE(__PT_KERNEL_args_def_FN_, \
822+
#define _PT_REGISTER_KERNEL_WITH_NO_TYPE( \
823+
kernel_name, func_id, backend, layout, meta_kernel_fn) \
824+
PT_STATIC_ASSERT_GLOBAL_NAMESPACE( \
825+
PT_CONCATENATE(pt_op_kernel_ns_check_, func_id), \
826+
"PT_REGISTER_KERNEL must be called in global namespace."); \
827+
decltype(meta_kernel_fn) meta_kernel_fn; \
828+
static void PT_CONCATENATE(__PT_KERNEL_args_def_FN_, \
829+
func_id)(::pten::Kernel*); \
830+
static const ::pten::KernelRegistrar PT_CONCATENATE(__reg_pt_op_kernel_, \
831+
func_id)( \
832+
kernel_name, \
833+
BACKEND(backend), \
834+
DATALAYOUT(layout), \
835+
::pten::KernelArgsParseFunctor<decltype(&meta_kernel_fn)>::Parse, \
836+
&PT_CONCATENATE(__PT_KERNEL_args_def_FN_, func_id), \
837+
PT_KERNEL(meta_kernel_fn)); \
838+
void PT_CONCATENATE(__PT_KERNEL_args_def_FN_, \
857839
func_id)(::pten::Kernel * kernel)
858840
} // namespace pten

paddle/pten/core/kernel_utils.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ struct KernelImpl<Return (*)(Args...), kernel_fn> {
208208
PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(paddle::platform::float16);
209209
PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(const Scalar&);
210210
PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(const std::vector<int64_t>&);
211+
PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(const std::vector<int>&);
211212

212213
/* Output Helpers */
213214

paddle/pten/include/manipulation.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,17 @@ DenseTensor Flatten(const ContextT& dev_ctx,
3737
return dense_out;
3838
}
3939

40+
template <typename T, typename ContextT>
41+
DenseTensor Reshape(const ContextT& dev_ctx,
42+
const DenseTensor& x,
43+
const std::vector<int>& shape) {
44+
auto out_meta = InferShapeFromVecValue(x.meta(), shape);
45+
const auto allocator =
46+
std::make_shared<paddle::experimental::DefaultAllocator>(
47+
dev_ctx.GetPlace());
48+
pten::DenseTensor dense_out(allocator, out_meta);
49+
ReshapeFromVectorVal(dev_ctx, x, shape, &dense_out);
50+
return dense_out;
51+
}
52+
4053
} // namespace pten

0 commit comments

Comments
 (0)