From b8d1789f410595faab7df68ba08af73c984f1583 Mon Sep 17 00:00:00 2001 From: Meiyim Date: Thu, 11 Mar 2021 13:43:27 +0800 Subject: [PATCH 1/7] [npu] support npu for kernel `transpose` --- paddle/fluid/operators/transpose_op_npu.cc | 83 ++++++++++ .../fluid/operators/transpose_op_npu_test.cc | 149 ++++++++++++++++++ .../unittests/npu/test_transpose_op_npu.py | 88 +++++++++++ 3 files changed, 320 insertions(+) create mode 100644 paddle/fluid/operators/transpose_op_npu.cc create mode 100644 paddle/fluid/operators/transpose_op_npu_test.cc create mode 100644 python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py diff --git a/paddle/fluid/operators/transpose_op_npu.cc b/paddle/fluid/operators/transpose_op_npu.cc new file mode 100644 index 00000000000000..1a680097a9834d --- /dev/null +++ b/paddle/fluid/operators/transpose_op_npu.cc @@ -0,0 +1,83 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_ASCEND_CL +#include +#include +#include + +#include "paddle/fluid/operators/npu_op_runner.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/expand_op.h" + +namespace paddle { +namespace operators { + +template +class TransposeNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); + std::vector axis = ctx.Attr>("axis"); + framework::NPUAttributeMap attr_input = {{"perm", axis}}; + auto runner = NpuOpRunner("TransposeD", {*x}, {*out}, attr_input); + auto stream = ctx.template device_context().stream(); + runner.Run(stream); + //ctx.device_context().Wait(); + + } +}; + +template +class TransposeGradNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + auto* out_grad = ctx.Input(framework::GradVarName("Out")); + auto* x_grad = ctx.Output(framework::GradVarName("X")); + std::vector axis = ctx.Attr>("axis"); + std::vector reversed_axis(axis); + for (size_t i = 0; i < axis.size(); i++) { + reversed_axis[axis[i]] = i; + } + + framework::NPUAttributeMap attr_input = {{"perm", reversed_axis}}; + auto runner = NpuOpRunner("TransposeD", {*out_grad}, {*x_grad}, attr_input); + auto stream = ctx.template device_context().stream(); + runner.Run(stream); + } +}; + +} +} + +namespace ops = paddle::operators; + +REGISTER_OP_NPU_KERNEL(transpose, + ops::TransposeNPUKernel, + ops::TransposeNPUKernel, + ops::TransposeNPUKernel, + ops::TransposeNPUKernel, + ops::TransposeNPUKernel +); + +REGISTER_OP_NPU_KERNEL(transpose_grad, + ops::TransposeGradNPUKernel, + ops::TransposeGradNPUKernel, + ops::TransposeGradNPUKernel, + ops::TransposeGradNPUKernel, + ops::TransposeGradNPUKernel +); + + + +#endif + diff --git a/paddle/fluid/operators/transpose_op_npu_test.cc b/paddle/fluid/operators/transpose_op_npu_test.cc new file mode 100644 index 00000000000000..25f95f5c7780f2 --- /dev/null +++ b/paddle/fluid/operators/transpose_op_npu_test.cc @@ -0,0 +1,149 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifndef _WIN32 +#include +#endif + +#include +#include +#include // NOLINT +#include +#include +#include + +#include "gtest/gtest.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/operators/dropout_op.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/string/printf.h" + +namespace f = paddle::framework; +namespace p = paddle::platform; +namespace m = paddle::operators::math; + +USE_OP(transpose); +USE_OP_DEVICE_KERNEL(transpose, NPU); + + +template +void Compare(f::Scope* scope, const p::DeviceContext& ctx) { + // init + auto x = scope->Var("X"); + auto out = scope->Var("Out"); + auto* x_t = x->GetMutable(); + auto* out_t = out->GetMutable(); + auto place = ctx.GetPlace(); + + int dim0=2; + int dim1=3; + TensorFromVector(std::vector({0,1,2,3,4,5}), ctx, x_t); + ctx.Wait(); + x_t->Resize({dim0, dim1}); + out_t->Resize({dim0, dim1}); + ctx.Wait(); + out_t->mutable_data(place); + ctx.Wait(); + + f::AttributeMap attrs = { + {"axis", std::vector({1, 0})}, + {"data_format", std::string("AnyLayout")} + }; + auto op = f::OpRegistry::CreateOp("transpose", {{"X", {"X"}}}, + {{"Out", {"Out"}}}, attrs); + ctx.Wait(); + op->Run(*scope, place); + ctx.Wait(); + std::vector out_v; + TensorToVector(*out_t, ctx, &out_v); + ctx.Wait(); + + EXPECT_EQ(out_t->numel(), dim0 * dim1); + EXPECT_EQ(out_v[0], 0); + EXPECT_EQ(out_v[1], 3); + EXPECT_EQ(out_v[2], 1); + EXPECT_EQ(out_v[3], 4); + EXPECT_EQ(out_v[4], 2); + EXPECT_EQ(out_v[5], 5); +} + + +template +void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) { + // init + auto x = scope->Var("X"); + auto x_grad = scope->Var("X@GRAD"); + auto out = scope->Var("Out"); + auto out_grad = scope->Var("Out@GRAD"); + + auto* x_grad_t = x_grad->GetMutable(); + auto* x_t = x->GetMutable(); + auto* out_grad_t = out_grad->GetMutable(); + auto* out_t = out->GetMutable(); + int dim0=2; + int dim1=3; + auto place = ctx.GetPlace(); + + TensorFromVector(std::vector({0,1,2,3,4,5}), ctx, out_grad_t); + TensorFromVector(std::vector({0,1,2,3,4,5}), ctx, x_t); + ctx.Wait(); + x_grad_t->Resize({dim0, dim1}); + x_t->Resize({dim0, dim1}); + out_grad_t->Resize({dim0, dim1}); + out_t->Resize({dim0, dim1}); + + //out_grad_t->mutable_data(place); + x_grad_t->mutable_data(place); + out_t->mutable_data(place); + ctx.Wait(); + + f::AttributeMap attrs = { + {"axis", std::vector({1, 0})}, + {"data_format", std::string("AnyLayout")} + }; + /* + {"mkldnn_data_type", "float32"}, + {"use_mkldnn", false}, + {"use_quantizer", false}, + */ + auto op = f::OpRegistry::CreateOp("transpose_grad", {{"Out@GRAD", {"Out@GRAD"}}, {"X", {"X"}}, {"Out", {"Out"}}}, + {{"X@GRAD", {"X@GRAD"}}}, attrs); + op->Run(*scope, place); + ctx.Wait(); + std::vector out_v; + TensorToVector(*x_grad_t, ctx, &out_v); + ctx.Wait(); + + EXPECT_EQ(x_grad_t->numel(), dim0 * dim1); + EXPECT_EQ(out_v[0], 0); + EXPECT_EQ(out_v[1], 3); + EXPECT_EQ(out_v[2], 1); + EXPECT_EQ(out_v[3], 4); + EXPECT_EQ(out_v[4], 2); + EXPECT_EQ(out_v[5], 5); + +} + + +TEST(transpose, NPU_fp32) { + f::Scope scope; + p::NPUDeviceContext ctx(p::NPUPlace(0)); + Compare(&scope, ctx); +} + +TEST(transpose_grad, NPU_fp32) { + f::Scope scope; + p::NPUDeviceContext ctx(p::NPUPlace(0)); + CompareGrad(&scope, ctx); +} + diff --git a/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py new file mode 100644 index 00000000000000..931b1942b18ef3 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py @@ -0,0 +1,88 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import unittest +import sys +sys.path.append("..") +from op_test import OpTest, _set_use_system_allocator +import paddle +import paddle.fluid as fluid + +paddle.enable_static() + + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestTransposeOp(OpTest): + def setUp(self): + self.set_npu() + self.op_type = "transpose" + self.place = paddle.NPUPlace(0) + self.init_dtype() + self.init_input_output() + self.init_kernel_type() + self.init_axis() + + self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(self.x)} + self.attrs = {'axis': [0, 2, 1, 3], 'data_format': 'AnyLayout'} + self.outputs = {'Out': self.out} + + def set_npu(self): + self.__class__.use_npu = True + + def init_kernel_type(self): + self.use_mkldnn = False + + def init_input_output(self): + self.x = np.random.uniform(0.1, 1, [8, 512, 12, 64]).astype(self.dtype) + self.out = np.transpose(self.x, [0, 2, 1, 3]) + + def init_dtype(self): + self.dtype = np.float32 + + def init_axis(self): + self.axis = -1 + + def test_check_output(self): + self.check_output_with_place(self.place, check_dygraph=False) + + # TODO(ascendrc): Test grad op after it is implemented. + # def test_check_grad_normal(self): + # self.check_grad_with_place( + # self.place, ['X', 'Y'], + # 'Out', + # max_relative_error=0.006, + # check_dygraph=False) + # + # def test_check_grad_ingore_x(self): + # self.check_grad_with_place( + # self.place, ['Y'], + # 'Out', + # no_grad_set=set("X"), + # max_relative_error=0.006, + # check_dygraph=False) + # + # def test_check_grad_ingore_y(self): + # self.check_grad_with_place( + # self.place, ['X'], + # 'Out', + # no_grad_set=set("Y"), + # max_relative_error=0.006,check_dygraph=False) + + +if __name__ == '__main__': + unittest.main() From e7f149b3c727f9f40d9a2b6980ef9bb771357835 Mon Sep 17 00:00:00 2001 From: Meiyim Date: Fri, 12 Mar 2021 14:04:39 +0800 Subject: [PATCH 2/7] +cmake --- paddle/fluid/operators/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 17234edb116e3e..203a2fb0807411 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -160,6 +160,10 @@ set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") add_subdirectory(benchmark) cc_test(op_debug_string_test SRCS op_debug_string_test.cc DEPS elementwise_add_op) +if (WITH_ASCEND_CL) + cc_test(transpose_op_npu_test SRCS transpose_op_npu_test.cc DEPS op_registry transpose_op scope device_context enforce executor) +endif() + if(WITH_MKLDNN) include(mkldnn/inplace_op_tests.cmake) From 38884f71f0556bc6bd2fdd04e86e31230ef05e81 Mon Sep 17 00:00:00 2001 From: Meiyim Date: Tue, 16 Mar 2021 16:13:01 +0800 Subject: [PATCH 3/7] +add init --- paddle/fluid/operators/transpose_op_npu.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/operators/transpose_op_npu.cc b/paddle/fluid/operators/transpose_op_npu.cc index 1a680097a9834d..cef1dcee8c4eda 100644 --- a/paddle/fluid/operators/transpose_op_npu.cc +++ b/paddle/fluid/operators/transpose_op_npu.cc @@ -29,10 +29,11 @@ class TransposeNPUKernel : public framework::OpKernel { auto* out = ctx.Output("Out"); std::vector axis = ctx.Attr>("axis"); framework::NPUAttributeMap attr_input = {{"perm", axis}}; + out->mutable_data(ctx.device_context().GetPlace()); auto runner = NpuOpRunner("TransposeD", {*x}, {*out}, attr_input); auto stream = ctx.template device_context().stream(); runner.Run(stream); - //ctx.device_context().Wait(); + //.Wait(); } }; From 053a734047e9d8d9182b15fbe44947a788046434 Mon Sep 17 00:00:00 2001 From: Meiyim Date: Tue, 16 Mar 2021 16:20:46 +0800 Subject: [PATCH 4/7] cleanup --- .../fluid/operators/transpose_op_npu_test.cc | 38 ++++++++----------- .../unittests/npu/test_transpose_op_npu.py | 23 ----------- 2 files changed, 16 insertions(+), 45 deletions(-) diff --git a/paddle/fluid/operators/transpose_op_npu_test.cc b/paddle/fluid/operators/transpose_op_npu_test.cc index 25f95f5c7780f2..c7a791956fbe56 100644 --- a/paddle/fluid/operators/transpose_op_npu_test.cc +++ b/paddle/fluid/operators/transpose_op_npu_test.cc @@ -45,16 +45,15 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { auto* out_t = out->GetMutable(); auto place = ctx.GetPlace(); - int dim0=2; - int dim1=3; - TensorFromVector(std::vector({0,1,2,3,4,5}), ctx, x_t); + int dim0 = 2; + int dim1 = 3; + TensorFromVector(std::vector({0, 1, 2, 3, 4, 5}), ctx, x_t); ctx.Wait(); - x_t->Resize({dim0, dim1}); + x_t->Resize({dim0, dim1}); out_t->Resize({dim0, dim1}); ctx.Wait(); out_t->mutable_data(place); ctx.Wait(); - f::AttributeMap attrs = { {"axis", std::vector({1, 0})}, {"data_format", std::string("AnyLayout")} @@ -63,10 +62,10 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { {{"Out", {"Out"}}}, attrs); ctx.Wait(); op->Run(*scope, place); - ctx.Wait(); + ctx.Wait(); std::vector out_v; TensorToVector(*out_t, ctx, &out_v); - ctx.Wait(); + ctx.Wait(); EXPECT_EQ(out_t->numel(), dim0 * dim1); EXPECT_EQ(out_v[0], 0); @@ -90,34 +89,29 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) { auto* x_t = x->GetMutable(); auto* out_grad_t = out_grad->GetMutable(); auto* out_t = out->GetMutable(); - int dim0=2; - int dim1=3; + int dim0 = 2; + int dim1 = 3; auto place = ctx.GetPlace(); - TensorFromVector(std::vector({0,1,2,3,4,5}), ctx, out_grad_t); - TensorFromVector(std::vector({0,1,2,3,4,5}), ctx, x_t); + TensorFromVector(std::vector({0, 1, 2, 3, 4, 5}), ctx, out_grad_t); + TensorFromVector(std::vector({0, 1, 2, 3, 4, 5}), ctx, x_t); ctx.Wait(); - x_grad_t->Resize({dim0, dim1}); - x_t->Resize({dim0, dim1}); + x_grad_t->Resize({dim0, dim1}); + x_t->Resize({dim0, dim1}); out_grad_t->Resize({dim0, dim1}); out_t->Resize({dim0, dim1}); - //out_grad_t->mutable_data(place); x_grad_t->mutable_data(place); out_t->mutable_data(place); ctx.Wait(); - f::AttributeMap attrs = { {"axis", std::vector({1, 0})}, {"data_format", std::string("AnyLayout")} }; - /* - {"mkldnn_data_type", "float32"}, - {"use_mkldnn", false}, - {"use_quantizer", false}, - */ - auto op = f::OpRegistry::CreateOp("transpose_grad", {{"Out@GRAD", {"Out@GRAD"}}, {"X", {"X"}}, {"Out", {"Out"}}}, - {{"X@GRAD", {"X@GRAD"}}}, attrs); + auto op = f::OpRegistry::CreateOp( + "transpose_grad", + {{"Out@GRAD", {"Out@GRAD"}}, {"X", {"X"}}, {"Out", {"Out"}}}, + {{"X@GRAD", {"X@GRAD"}}}, attrs); op->Run(*scope, place); ctx.Wait(); std::vector out_v; diff --git a/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py index 931b1942b18ef3..a3fd63a00f2fda 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py @@ -60,29 +60,6 @@ def init_axis(self): def test_check_output(self): self.check_output_with_place(self.place, check_dygraph=False) - # TODO(ascendrc): Test grad op after it is implemented. - # def test_check_grad_normal(self): - # self.check_grad_with_place( - # self.place, ['X', 'Y'], - # 'Out', - # max_relative_error=0.006, - # check_dygraph=False) - # - # def test_check_grad_ingore_x(self): - # self.check_grad_with_place( - # self.place, ['Y'], - # 'Out', - # no_grad_set=set("X"), - # max_relative_error=0.006, - # check_dygraph=False) - # - # def test_check_grad_ingore_y(self): - # self.check_grad_with_place( - # self.place, ['X'], - # 'Out', - # no_grad_set=set("Y"), - # max_relative_error=0.006,check_dygraph=False) - if __name__ == '__main__': unittest.main() From fd742d74373338442e23bb41958cecbb5bd4b53f Mon Sep 17 00:00:00 2001 From: Meiyim Date: Wed, 17 Mar 2021 13:14:51 +0800 Subject: [PATCH 5/7] cleanup --- paddle/fluid/operators/transpose_op_npu.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/paddle/fluid/operators/transpose_op_npu.cc b/paddle/fluid/operators/transpose_op_npu.cc index cef1dcee8c4eda..2d71bfdc725108 100644 --- a/paddle/fluid/operators/transpose_op_npu.cc +++ b/paddle/fluid/operators/transpose_op_npu.cc @@ -33,7 +33,6 @@ class TransposeNPUKernel : public framework::OpKernel { auto runner = NpuOpRunner("TransposeD", {*x}, {*out}, attr_input); auto stream = ctx.template device_context().stream(); runner.Run(stream); - //.Wait(); } }; From 236335208e7501733c208db2804c9d1d4b97b519 Mon Sep 17 00:00:00 2001 From: Meiyim Date: Wed, 17 Mar 2021 17:23:13 +0800 Subject: [PATCH 6/7] fp16 python unit test --- .../fluid/tests/unittests/npu/test_transpose_op_npu.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py index a3fd63a00f2fda..9ee296875fb9b4 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py @@ -61,5 +61,13 @@ def test_check_output(self): self.check_output_with_place(self.place, check_dygraph=False) + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestTransposeOpFP16(OpTest): + no_need_check_grad = True + def init_dtype(self): + self.dtype = np.float16 + if __name__ == '__main__': unittest.main() From 8ab8275fc4e91cf067a6181d5af90744058e679e Mon Sep 17 00:00:00 2001 From: Meiyim Date: Wed, 17 Mar 2021 20:39:07 +0800 Subject: [PATCH 7/7] fix python unit test --- .../fluid/tests/unittests/npu/test_transpose_op_npu.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py index 9ee296875fb9b4..797531a6c0f99e 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py @@ -61,13 +61,14 @@ def test_check_output(self): self.check_output_with_place(self.place, check_dygraph=False) - @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") -class TestTransposeOpFP16(OpTest): - no_need_check_grad = True +class TestTransposeOpFP16(TestTransposeOp): + no_need_check_grad = True + def init_dtype(self): self.dtype = np.float16 + if __name__ == '__main__': unittest.main()