-
Notifications
You must be signed in to change notification settings - Fork 5.9k
[PTen] Add variable transform to/from ptenTensor and add cast kernel #36916
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 47 commits
0ee0d15
1ae3fe2
dc81e06
7cd7966
a244a0f
0eaf913
dc9b42a
83415da
62cfb21
b6c3c05
17913da
6fbd94d
70d4069
c434610
a5c234e
821b6e0
4538134
bbc83c4
1feb022
6d55883
7eea4dd
51dc272
a927e6f
f893438
9ba67db
d8ce4c3
dce29b1
b567179
4b70d76
38a4e46
dedd03e
ca9a284
620960b
f4d6cc6
acd5649
a12a3a1
e22707e
6adacbe
9276daa
4427386
dcaa367
0f6dd13
a07cda0
90b05d9
b71b964
8633603
3410498
6199455
c469ffd
6d712d0
539ce20
30e057a
8ca880b
55f7cb6
d8873ff
7860e78
9df1dc4
37a805d
9a49c51
9dbfbee
2a27ce3
68aa002
a112c6d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1183,6 +1183,9 @@ void OperatorWithKernel::RunImpl(const Scope& scope, | |
| } | ||
| BuildPtenKernelContext(*runtime_ctx, dev_ctx); | ||
| (*pt_kernel_)(pt_kernel_context_.get()); | ||
|
|
||
| WriteBackToOutputs(runtime_ctx); | ||
|
|
||
| pt_kernel_context_->ClearData(); | ||
| } else { | ||
| (*kernel_func_)( | ||
|
|
@@ -1808,50 +1811,78 @@ void OperatorWithKernel::BuildPtenKernelContext( | |
| for (size_t i = 0; i < input_names.size(); ++i) { | ||
| auto& in_def = input_defs.at(i); | ||
| auto& ins_vector = ctx.inputs.at(input_names[i]); | ||
| if (pt_kernel_context_->InputsSize() <= i) { | ||
|
|
||
| size_t start_idx = | ||
| (i == 0 ? 0 : pt_kernel_context_->InputRangeAt(i - 1).second); | ||
| size_t end_idx = start_idx + ins_vector.size(); | ||
|
|
||
| if (pt_kernel_context_->InputsSize() == start_idx) { | ||
| paddle::SmallVector<std::shared_ptr<pten::TensorBase>> tmp_inputs; | ||
| for (auto* var : ins_vector) { | ||
| tmp_inputs.emplace_back( | ||
| experimental::MakePtenTensorBaseFromVar(*var, in_def)); | ||
| } | ||
| pt_kernel_context_->EmplaceBackInputs(std::move(tmp_inputs)); | ||
| } else { | ||
| } else if (pt_kernel_context_->InputsSize() > start_idx) { | ||
| size_t input_size = pt_kernel_context_->InputsSize(); | ||
| for (size_t j = 0; j < ins_vector.size(); ++j) { | ||
| if (input_size > i + j) { | ||
| if (input_size > start_idx + j) { | ||
| experimental::ReMakePtenDenseTensorFromVar( | ||
| *ins_vector[j], in_def, | ||
| pt_kernel_context_->MutableInputAt<pten::DenseTensor>(i + j)); | ||
| pt_kernel_context_->MutableInputAt<pten::DenseTensor>(start_idx + | ||
| j)); | ||
| } else { | ||
| pt_kernel_context_->EmplaceBackInputWithoutSetRange( | ||
| experimental::MakePtenTensorBaseFromVar(*ins_vector[j], in_def)); | ||
| } | ||
| // TODO(chenweihang): adapt multi-input case later | ||
| } | ||
| pt_kernel_context_->MutableInputRangeAt(i) = | ||
| std::make_pair(i, i + ins_vector.size()); | ||
| std::make_pair(start_idx, end_idx); | ||
| } else { | ||
| PADDLE_THROW(platform::errors::PreconditionNotMet( | ||
| "error start index when trying to set new tensor to inputs, start " | ||
| "index is `%d`, but current pt_kernel_context_.inputs.size() is " | ||
| "`%d` ", | ||
|
||
| start_idx, pt_kernel_context_->InputsSize())); | ||
| } | ||
| } | ||
|
|
||
| for (size_t i = 0; i < output_names.size(); ++i) { | ||
| auto& out_def = output_defs.at(i); | ||
| auto& outs_vector = ctx.outputs.at(output_names[i]); | ||
| if (pt_kernel_context_->OutputsSize() <= i) { | ||
|
|
||
| size_t start_idx = | ||
| (i == 0 ? 0 : pt_kernel_context_->OutputRangeAt(i - 1).second); | ||
| size_t end_idx = start_idx + outs_vector.size(); | ||
|
|
||
| if (pt_kernel_context_->OutputsSize() == start_idx) { | ||
| paddle::SmallVector<std::shared_ptr<pten::TensorBase>> tmp_outputs; | ||
| for (auto* var : outs_vector) { | ||
| tmp_outputs.emplace_back( | ||
| experimental::MakePtenTensorBaseFromVar(var, out_def)); | ||
| } | ||
| pt_kernel_context_->EmplaceBackOutputs(std::move(tmp_outputs)); | ||
| } else { | ||
| } else if (pt_kernel_context_->OutputsSize() > start_idx) { | ||
| size_t output_size = pt_kernel_context_->OutputsSize(); | ||
| for (size_t j = 0; j < outs_vector.size(); ++j) { | ||
| if (output_size > i + j) { | ||
| if (output_size > start_idx + j) { | ||
| experimental::ReMakePtenDenseTensorFromVar( | ||
| outs_vector[j], out_def, | ||
| pt_kernel_context_->MutableOutputAt<pten::DenseTensor>(i + j)); | ||
| pt_kernel_context_->MutableOutputAt<pten::DenseTensor>(start_idx + | ||
| j)); | ||
| } else { | ||
| pt_kernel_context_->EmplaceBackOutputWithoutSetRange( | ||
| experimental::MakePtenTensorBaseFromVar(outs_vector[j], out_def)); | ||
| } | ||
| // TODO(chenweihang): adapt multi-output case later | ||
| } | ||
| pt_kernel_context_->MutableOutputRangeAt(i) = | ||
| std::make_pair(i, i + outs_vector.size()); | ||
| std::make_pair(start_idx, end_idx); | ||
| } else { | ||
| PADDLE_THROW(platform::errors::PreconditionNotMet( | ||
| "error start index when trying to set new tensor to inputs, start " | ||
| "index is `%d`, but current pt_kernel_context_.outputs.size() is " | ||
| "`%d` ", | ||
|
||
| start_idx, pt_kernel_context_->OutputsSize())); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -1882,6 +1913,16 @@ void OperatorWithKernel::BuildPtenKernelContext( | |
| pt_kernel_context_->EmplaceBackAttr(BOOST_GET_CONST(float, attr)); | ||
| } else if (attr_defs[i].type_index == std::type_index(typeid(bool))) { | ||
| pt_kernel_context_->EmplaceBackAttr(BOOST_GET_CONST(bool, attr)); | ||
| } else if (attr_defs[i].type_index == | ||
| std::type_index(typeid(pten::DataType))) { | ||
| auto data_type = pten::TransToPtenDataType( | ||
| static_cast<framework::proto::VarType::Type>( | ||
| BOOST_GET_CONST(int, attr))); | ||
| pt_kernel_context_->EmplaceBackAttr(data_type); | ||
| } else if (attr_defs[i].type_index == | ||
| std::type_index(typeid(std::vector<int>))) { | ||
| pt_kernel_context_->EmplaceBackAttr( | ||
| BOOST_GET_CONST(std::vector<int>, attr)); | ||
| } else if (attr_defs[i].type_index == | ||
| std::type_index(typeid(std::vector<int64_t>)) && | ||
| std::type_index(attr.type()) == | ||
|
|
@@ -1901,5 +1942,26 @@ void OperatorWithKernel::BuildPtenKernelContext( | |
| } | ||
| } | ||
|
|
||
| void OperatorWithKernel::WriteBackToOutputs(RuntimeContext* ctx) const { | ||
| // auto& input_names = std::get<0>(pt_kernel_signature_->args); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 如果是无用的注释,建议在下个PR移除
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 好的 |
||
| // auto& attr_names = std::get<1>(pt_kernel_signature_->args); | ||
| auto& output_names = std::get<2>(pt_kernel_signature_->args); | ||
|
|
||
| // pt_kernel_context_ | ||
|
|
||
| for (size_t i = 0; i < output_names.size(); ++i) { | ||
| auto& outs_vector = ctx->outputs.at(output_names[i]); | ||
|
|
||
| auto& range_pair = pt_kernel_context_->OutputRangeAt(i); | ||
| auto pten_outs = | ||
| pt_kernel_context_->MutableOutputBetween<pten::DenseTensor>( | ||
| range_pair.first, range_pair.second); | ||
|
|
||
| for (size_t j = 0; j < pten_outs.size(); ++j) { | ||
| experimental::MakeVariableFromPtenTensor(pten_outs[j], outs_vector[j]); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| } // namespace framework | ||
| } // namespace paddle | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -209,5 +209,7 @@ void Tensor::ResetHolderWithType(std::shared_ptr<memory::Allocation> holder, | |
| type_ = type; | ||
| } | ||
|
|
||
| void Tensor::setType(const proto::VarType::Type type) { type_ = type; } | ||
|
||
|
|
||
| } // namespace framework | ||
| } // namespace paddle | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -295,52 +295,76 @@ static void BuildDygraphPtenKernelContext( | |
| for (size_t i = 0; i < input_names.size(); ++i) { | ||
| auto& in_def = input_defs.at(i); | ||
| auto& ins_vector = ins.at(input_names[i]); | ||
| if (kernel_ctx->InputsSize() <= i) { | ||
|
|
||
| size_t start_idx = (i == 0 ? 0 : kernel_ctx->InputRangeAt(i - 1).second); | ||
| size_t end_idx = start_idx + ins_vector.size(); | ||
|
|
||
| if (kernel_ctx->InputsSize() == start_idx) { | ||
| paddle::SmallVector<std::shared_ptr<pten::TensorBase>> tmp_inputs; | ||
| for (const auto& var : ins_vector) { | ||
| const auto& variable = var->Var(); | ||
| tmp_inputs.emplace_back( | ||
| experimental::MakePtenTensorBaseFromVar(variable, in_def)); | ||
| } | ||
| kernel_ctx->EmplaceBackInputs(std::move(tmp_inputs)); | ||
| } else { | ||
| } else if (kernel_ctx->InputsSize() > start_idx) { | ||
| size_t input_size = kernel_ctx->InputsSize(); | ||
| for (size_t j = 0; j < ins_vector.size(); ++j) { | ||
| if (input_size > i + j) { | ||
| if (input_size > start_idx + j) { | ||
| experimental::ReMakePtenDenseTensorFromVar( | ||
| ins_vector[j]->Var(), in_def, | ||
| kernel_ctx->MutableInputAt<pten::DenseTensor>(i + j)); | ||
| kernel_ctx->MutableInputAt<pten::DenseTensor>(start_idx + j)); | ||
| } else { | ||
| kernel_ctx->EmplaceBackInputWithoutSetRange( | ||
| experimental::MakePtenTensorBaseFromVar(ins_vector[j]->Var(), | ||
| in_def)); | ||
| } | ||
| // TODO(chenweihang): adapt multi-input case later | ||
| } | ||
| kernel_ctx->MutableInputRangeAt(i) = | ||
| std::make_pair(i, i + ins_vector.size()); | ||
| kernel_ctx->MutableInputRangeAt(i) = std::make_pair(start_idx, end_idx); | ||
| } else { | ||
| PADDLE_THROW(platform::errors::PreconditionNotMet( | ||
| "error start index when trying to set new tensor to inputs, start " | ||
| "index is `%d`, but current pt_kernel_context_.inputs.size() is " | ||
| "`%d` ", | ||
|
||
| start_idx, kernel_ctx->InputsSize())); | ||
| } | ||
| } | ||
|
|
||
| for (size_t i = 0; i < output_names.size(); ++i) { | ||
| auto& out_def = output_defs.at(i); | ||
| auto& outs_vector = outs.at(output_names[i]); | ||
| if (kernel_ctx->OutputsSize() <= i) { | ||
|
|
||
| size_t start_idx = (i == 0 ? 0 : kernel_ctx->OutputRangeAt(i - 1).second); | ||
| size_t end_idx = start_idx + outs_vector.size(); | ||
|
|
||
| if (kernel_ctx->OutputsSize() == start_idx) { | ||
| paddle::SmallVector<std::shared_ptr<pten::TensorBase>> tmp_outputs; | ||
| for (auto& var : outs_vector) { | ||
| auto* variable = var->MutableVar(); | ||
| tmp_outputs.emplace_back( | ||
| experimental::MakePtenTensorBaseFromVar(variable, out_def)); | ||
| } | ||
| kernel_ctx->EmplaceBackOutputs(std::move(tmp_outputs)); | ||
| } else { | ||
| } else if (kernel_ctx->OutputsSize() > start_idx) { | ||
| size_t output_size = kernel_ctx->OutputsSize(); | ||
| for (size_t j = 0; j < outs_vector.size(); ++j) { | ||
| if (output_size > i + j) { | ||
| experimental::ReMakePtenDenseTensorFromVar( | ||
| outs_vector[j]->MutableVar(), out_def, | ||
| kernel_ctx->MutableOutputAt<pten::DenseTensor>(i + j)); | ||
| } else { | ||
| kernel_ctx->EmplaceBackOutputWithoutSetRange( | ||
| experimental::MakePtenTensorBaseFromVar( | ||
| outs_vector[j]->MutableVar(), out_def)); | ||
| } | ||
| // TODO(chenweihang): adapt multi-output case later | ||
| } | ||
| kernel_ctx->MutableOutputRangeAt(i) = | ||
| std::make_pair(i, i + outs_vector.size()); | ||
| kernel_ctx->MutableOutputRangeAt(i) = std::make_pair(start_idx, end_idx); | ||
| } else { | ||
| PADDLE_THROW(platform::errors::PreconditionNotMet( | ||
| "error start index when trying to set new tensor to inputs, start " | ||
| "index is `%d`, but current pt_kernel_context_.outputs.size() is " | ||
| "`%d` ", | ||
|
||
| start_idx, kernel_ctx->OutputsSize())); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -371,6 +395,15 @@ static void BuildDygraphPtenKernelContext( | |
| kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(float, attr)); | ||
| } else if (attr_defs[i].type_index == std::type_index(typeid(bool))) { | ||
| kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(bool, attr)); | ||
| } else if (attr_defs[i].type_index == | ||
| std::type_index(typeid(pten::DataType))) { | ||
| auto data_type = pten::TransToPtenDataType( | ||
| static_cast<framework::proto::VarType::Type>( | ||
| BOOST_GET_CONST(int, attr))); | ||
| kernel_ctx->EmplaceBackAttr(data_type); | ||
| } else if (attr_defs[i].type_index == | ||
| std::type_index(typeid(std::vector<int>))) { | ||
| kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(std::vector<int>, attr)); | ||
| } else if (attr_defs[i].type_index == | ||
| std::type_index(typeid(std::vector<int64_t>)) && | ||
| std::type_index(attr.type()) == | ||
|
|
@@ -390,6 +423,26 @@ static void BuildDygraphPtenKernelContext( | |
| } | ||
| } | ||
|
|
||
| template <typename VarType> | ||
| static void WriteBackToOutputs( | ||
| const framework::KernelSignature& pt_kernel_signature, | ||
| const NameVarMap<VarType>& outs, pten::KernelContext* kernel_ctx) { | ||
| auto& output_names = std::get<2>(pt_kernel_signature.args); | ||
|
|
||
| for (size_t i = 0; i < output_names.size(); ++i) { | ||
| auto& outs_vector = outs.at(output_names[i]); | ||
|
|
||
| auto& range_pair = kernel_ctx->OutputRangeAt(i); | ||
| auto pten_outs = kernel_ctx->MutableOutputBetween<pten::DenseTensor>( | ||
| range_pair.first, range_pair.second); | ||
|
|
||
| for (size_t j = 0; j < pten_outs.size(); ++j) { | ||
| experimental::MakeVariableFromPtenTensor(pten_outs[j], | ||
| outs_vector[j]->MutableVar()); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| template <typename VarType> | ||
| static void PreparedOpRunImpl( | ||
| const framework::OperatorBase& op, const framework::RuntimeContext& ctx, | ||
|
|
@@ -463,6 +516,20 @@ static void PreparedOpRunPtImpl( | |
|
|
||
| pt_kernel(pt_kernel_context); | ||
|
|
||
| if (FLAGS_benchmark) { | ||
| dev_ctx->Wait(); | ||
| #if defined(PADDLE_WITH_CUDA) | ||
| PADDLE_ENFORCE_CUDA_SUCCESS(cudaGetLastError()); | ||
| VLOG(4) << "Operator(" << op.Type() << "): context wait and get last error"; | ||
| #endif | ||
| #if defined(PADDLE_WITH_HIP) | ||
| PADDLE_ENFORCE_CUDA_SUCCESS(hipGetLastError()); | ||
| VLOG(4) << "Operator(" << op.Type() << "): context wait and get last error"; | ||
| #endif | ||
| } | ||
|
|
||
| WriteBackToOutputs<VarType>(pt_kernel_signature, outs, pt_kernel_context); | ||
|
|
||
| // Ensure that it does not affect the VarBase life cycle management | ||
| pt_kernel_context->ClearData(); | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -552,14 +552,13 @@ class Reshape2Op : public ReshapeOp { | |
| const framework::ExecutionContext &ctx) const override { | ||
| auto multi_inputs = ctx.MultiInput<framework::Tensor>("ShapeTensor"); | ||
| if (multi_inputs.size() > 0) { | ||
| return framework::KernelSignature( | ||
| "reshape2.mulhost.mid", {"X", "ShapeTensor"}, {}, {"XShape", "Out"}); | ||
| return framework::KernelSignature("reshape2.mulhost", | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 为什么XShape输出可以删除?这里mul和host后缀最好拆分一下
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 和负责的rd 沟通过,先保持现状 |
||
| {"X", "ShapeTensor"}, {}, {"Out"}); | ||
| } else if (ctx.HasInput("Shape")) { | ||
| return framework::KernelSignature("reshape2.host.mid", {"X", "Shape"}, {}, | ||
| {"XShape", "Out"}); | ||
| return framework::KernelSignature("reshape2.host", {"X", "Shape"}, {}, | ||
| {"Out"}); | ||
| } else { | ||
| return framework::KernelSignature("reshape2.mid", {"X"}, {"shape"}, | ||
| {"XShape", "Out"}); | ||
| return framework::KernelSignature("reshape2", {"X"}, {"shape"}, {"Out"}); | ||
| } | ||
| } | ||
| }; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,6 +21,8 @@ namespace experimental { | |
|
|
||
| PD_DLL_DECL Tensor flatten(const Tensor& x, int start_axis, int stop_axis); | ||
|
|
||
| Tensor cast(const Tensor& x, DataType out_dtype); | ||
|
||
|
|
||
| PD_DLL_DECL Tensor reshape(const Tensor& x, const std::vector<int64_t>& shape); | ||
| } // namespace experimental | ||
| } // namespace paddle | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
这部分逻辑建议加上详细注释,方便代码阅读理解
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
已经添加注释