Skip to content

Commit b6b3242

Browse files
author
Paddle CI_MAC
committed
ascend transpose
1 parent 0310945 commit b6b3242

File tree

3 files changed

+274
-0
lines changed

3 files changed

+274
-0
lines changed

paddle/fluid/operators/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,10 @@ set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")
160160
add_subdirectory(benchmark)
161161

162162
cc_test(op_debug_string_test SRCS op_debug_string_test.cc DEPS elementwise_add_op)
163+
if (WITH_ASCEND_CL)
164+
cc_test(transpose_op_npu_test SRCS transpose_op_npu_test.cc DEPS op_registry transpose_op scope device_context enforce executor)
165+
endif()
166+
163167

164168
if(WITH_MKLDNN)
165169
include(mkldnn/inplace_op_tests.cmake)
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2+
Licensed under the Apache License, Version 2.0 (the "License");
3+
you may not use this file except in compliance with the License.
4+
You may obtain a copy of the License at
5+
http://www.apache.org/licenses/LICENSE-2.0
6+
Unless required by applicable law or agreed to in writing, software
7+
distributed under the License is distributed on an "AS IS" BASIS,
8+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9+
See the License for the specific language governing permissions and
10+
limitations under the License. */
11+
12+
#ifdef PADDLE_WITH_ASCEND_CL
13+
#include <memory>
14+
#include <string>
15+
#include <iostream>
16+
17+
#include "paddle/fluid/operators/npu_op_runner.h"
18+
#include "paddle/fluid/framework/op_registry.h"
19+
#include "paddle/fluid/operators/expand_op.h"
20+
21+
namespace paddle {
22+
namespace operators {
23+
24+
template <typename DeviceContext, typename T>
25+
class TransposeNPUKernel : public framework::OpKernel<T> {
26+
public:
27+
void Compute(const framework::ExecutionContext& ctx) const override {
28+
auto* x = ctx.Input<framework::LoDTensor>("X");
29+
auto* out = ctx.Output<framework::LoDTensor>("Out");
30+
std::vector<int> axis = ctx.Attr<std::vector<int>>("axis");
31+
framework::NPUAttributeMap attr_input = {{"perm", axis}};
32+
for (auto& v: axis){
33+
std::cout <<"axis" << v <<std::endl;
34+
}
35+
std::vector<T> vec;
36+
TensorToVector(*x, ctx.device_context(), &vec);
37+
for (auto& v : vec){
38+
std::cout <<"x "<< v<<std::endl;
39+
}
40+
TensorToVector(*out, ctx.device_context(), &vec);
41+
for (auto& v : vec){
42+
std::cout <<"out "<< v<<std::endl;
43+
}
44+
auto runner = NpuOpRunner("TransposeD", {*x}, {*out}, attr_input);
45+
auto stream = ctx.template device_context<paddle::platform::NPUDeviceContext>().stream();
46+
runner.Run(stream);
47+
//ctx.device_context().Wait();
48+
TensorToVector(*out, ctx.device_context(), &vec);
49+
for (auto& v : vec){
50+
std::cout <<"out out"<< v<<std::endl;
51+
}
52+
TensorToVector(*x, ctx.device_context(), &vec);
53+
for (auto& v : vec){
54+
std::cout <<"out x"<< v<<std::endl;
55+
}
56+
57+
}
58+
};
59+
60+
template <typename T>
61+
class TransposeGradNPUKernel : public framework::OpKernel<T> {
62+
public:
63+
void Compute(const framework::ExecutionContext &ctx) const override {
64+
std::cout <<" enter grad kernel "<<std::endl;
65+
auto* out_grad = ctx.Input<framework::LoDTensor>(framework::GradVarName("Out"));
66+
auto* x_grad = ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
67+
std::vector<int> axis = ctx.Attr<std::vector<int>>("axis");
68+
std::vector<int> reversed_axis(axis);
69+
for (auto& v : axis){
70+
std::cout <<"axis grad "<< v<<std::endl;
71+
}
72+
73+
for (size_t i = 0; i < axis.size(); i++) {
74+
reversed_axis[axis[i]] = i;
75+
}
76+
77+
std::vector<T> vec;
78+
TensorToVector(*x_grad, ctx.device_context(), &vec);
79+
for (auto& v : vec){
80+
std::cout <<"x grad "<< v<<std::endl;
81+
}
82+
TensorToVector(*out_grad, ctx.device_context(), &vec);
83+
for (auto& v : vec){
84+
std::cout <<"out grad "<< v<<std::endl;
85+
}
86+
for (auto& v : reversed_axis){
87+
std::cout <<"axis "<< v<<std::endl;
88+
}
89+
90+
framework::NPUAttributeMap attr_input = {{"perm", reversed_axis}};
91+
auto runner = NpuOpRunner("TransposeD", {*out_grad}, {*x_grad}, attr_input);
92+
auto stream = ctx.template device_context<paddle::platform::NPUDeviceContext>().stream();
93+
runner.Run(stream);
94+
}
95+
};
96+
97+
}
98+
}
99+
100+
namespace ops = paddle::operators;
101+
102+
REGISTER_OP_NPU_KERNEL(transpose,
103+
ops::TransposeNPUKernel<paddle::platform::NPUDeviceContext, float>,
104+
ops::TransposeNPUKernel<paddle::platform::NPUDeviceContext, paddle::platform::float16>,
105+
ops::TransposeNPUKernel<paddle::platform::NPUDeviceContext, int>,
106+
ops::TransposeNPUKernel<paddle::platform::NPUDeviceContext, uint8_t>,
107+
ops::TransposeNPUKernel<paddle::platform::NPUDeviceContext, int8_t>
108+
);
109+
110+
REGISTER_OP_NPU_KERNEL(transpose_grad,
111+
ops::TransposeGradNPUKernel<float>,
112+
ops::TransposeGradNPUKernel<paddle::platform::float16>,
113+
ops::TransposeGradNPUKernel<int>,
114+
ops::TransposeGradNPUKernel<uint8_t>,
115+
ops::TransposeGradNPUKernel<int8_t>
116+
);
117+
118+
119+
120+
#endif
121+
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2+
Licensed under the Apache License, Version 2.0 (the "License");
3+
you may not use this file except in compliance with the License.
4+
You may obtain a copy of the License at
5+
http://www.apache.org/licenses/LICENSE-2.0
6+
Unless required by applicable law or agreed to in writing, software
7+
distributed under the License is distributed on an "AS IS" BASIS,
8+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9+
See the License for the specific language governing permissions and
10+
limitations under the License. */
11+
12+
#ifndef _WIN32
13+
#include <unistd.h>
14+
#endif
15+
16+
#include <string>
17+
#include <cmath>
18+
#include <thread> // NOLINT
19+
#include <vector>
20+
#include <numeric>
21+
#include <iostream>
22+
23+
#include "gtest/gtest.h"
24+
#include "paddle/fluid/framework/op_registry.h"
25+
#include "paddle/fluid/framework/operator.h"
26+
#include "paddle/fluid/framework/program_desc.h"
27+
#include "paddle/fluid/operators/dropout_op.h"
28+
#include "paddle/fluid/operators/math/math_function.h"
29+
#include "paddle/fluid/string/printf.h"
30+
31+
namespace f = paddle::framework;
32+
namespace p = paddle::platform;
33+
namespace m = paddle::operators::math;
34+
35+
USE_OP(transpose);
36+
USE_OP_DEVICE_KERNEL(transpose, NPU);
37+
38+
39+
template <typename T>
40+
void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
41+
// init
42+
auto x = scope->Var("X");
43+
auto out = scope->Var("Out");
44+
auto* x_t = x->GetMutable<f::LoDTensor>();
45+
auto* out_t = out->GetMutable<f::LoDTensor>();
46+
auto place = ctx.GetPlace();
47+
48+
int dim0=2;
49+
int dim1=2;
50+
TensorFromVector(std::vector<T>({0,1,2,3}), ctx, x_t);
51+
ctx.Wait();
52+
x_t->Resize({dim0, dim1});
53+
out_t->Resize({dim0, dim1});
54+
ctx.Wait();
55+
out_t->mutable_data<T>(place);
56+
ctx.Wait();
57+
58+
f::AttributeMap attrs = {
59+
{"axis", std::vector<int>({1, 0})},
60+
{"data_format", std::string("AnyLayout")}
61+
};
62+
auto op = f::OpRegistry::CreateOp("transpose", {{"X", {"X"}}},
63+
{{"Out", {"Out"}}}, attrs);
64+
ctx.Wait();
65+
op->Run(*scope, place);
66+
ctx.Wait();
67+
std::vector<T> out_v;
68+
TensorToVector(*out_t, ctx, &out_v);
69+
ctx.Wait();
70+
71+
EXPECT_EQ(out_t->numel(), dim0 * dim1);
72+
EXPECT_EQ(out_v[0], 0);
73+
EXPECT_EQ(out_v[1], 2);
74+
EXPECT_EQ(out_v[2], 1);
75+
EXPECT_EQ(out_v[3], 3);
76+
}
77+
78+
79+
template <typename T>
80+
void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) {
81+
// init
82+
std::cout<<"run grad test"<<std::endl;
83+
auto x = scope->Var("X");
84+
auto x_grad = scope->Var("X@GRAD");
85+
auto out = scope->Var("Out");
86+
auto out_grad = scope->Var("Out@GRAD");
87+
88+
auto* x_grad_t = x_grad->GetMutable<f::LoDTensor>();
89+
auto* x_t = x->GetMutable<f::LoDTensor>();
90+
auto* out_grad_t = out_grad->GetMutable<f::LoDTensor>();
91+
auto* out_t = out->GetMutable<f::LoDTensor>();
92+
int dim0=2;
93+
int dim1=2;
94+
auto place = ctx.GetPlace();
95+
96+
std::cout<<"build up tensor"<<std::endl;
97+
TensorFromVector(std::vector<T>({0,1,2,3}), ctx, out_grad_t);
98+
TensorFromVector(std::vector<T>({0,1,2,3}), ctx, x_t);
99+
ctx.Wait();
100+
x_grad_t->Resize({dim0, dim1});
101+
x_t->Resize({dim0, dim1});
102+
out_grad_t->Resize({dim0, dim1});
103+
out_t->Resize({dim0, dim1});
104+
105+
//out_grad_t->mutable_data<T>(place);
106+
x_grad_t->mutable_data<T>(place);
107+
out_t->mutable_data<T>(place);
108+
ctx.Wait();
109+
110+
std::cout<<"build op"<<std::endl;
111+
f::AttributeMap attrs = {
112+
{"axis", std::vector<int>({1, 0})},
113+
{"data_format", std::string("AnyLayout")}
114+
};
115+
/*
116+
{"mkldnn_data_type", "float32"},
117+
{"use_mkldnn", false},
118+
{"use_quantizer", false},
119+
*/
120+
auto op = f::OpRegistry::CreateOp("transpose_grad", {{"Out@GRAD", {"Out@GRAD"}}, {"X", {"X"}}, {"Out", {"Out"}}},
121+
{{"X@GRAD", {"X@GRAD"}}}, attrs);
122+
std::cout<<"run op"<<std::endl;
123+
op->Run(*scope, place);
124+
ctx.Wait();
125+
std::cout<<"build res"<<std::endl;
126+
std::vector<T> out_v;
127+
TensorToVector(*x_grad_t, ctx, &out_v);
128+
ctx.Wait();
129+
130+
EXPECT_EQ(x_grad_t->numel(), dim0 * dim1);
131+
EXPECT_EQ(out_v[0], 0);
132+
EXPECT_EQ(out_v[1], 2);
133+
EXPECT_EQ(out_v[2], 1);
134+
EXPECT_EQ(out_v[3], 3);
135+
}
136+
137+
138+
TEST(transpose, NPU_fp32) {
139+
f::Scope scope;
140+
p::NPUDeviceContext ctx(p::NPUPlace(0));
141+
Compare<float>(&scope, ctx);
142+
}
143+
144+
TEST(transpose_grad, NPU_fp32) {
145+
f::Scope scope;
146+
p::NPUDeviceContext ctx(p::NPUPlace(0));
147+
CompareGrad<float>(&scope, ctx);
148+
}
149+

0 commit comments

Comments
 (0)