From eefb6ca8cb1b7a8ad002e324729fd8916b5b7177 Mon Sep 17 00:00:00 2001 From: Aganlengzi Date: Wed, 29 Sep 2021 15:39:28 +0800 Subject: [PATCH 1/4] [NPU] modify transpose2 and index_select_grad kernels for model xlnet --- paddle/fluid/operators/index_select_op_npu.cc | 17 ++++++++------- paddle/fluid/operators/transpose_op_npu.cc | 21 ++++++++++++++----- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/operators/index_select_op_npu.cc b/paddle/fluid/operators/index_select_op_npu.cc index b624d03cc85559..825229282f3dac 100644 --- a/paddle/fluid/operators/index_select_op_npu.cc +++ b/paddle/fluid/operators/index_select_op_npu.cc @@ -99,10 +99,11 @@ class IndexSelectGradNPUKernel : public framework::OpKernel { transed_out_dims[i] = out_dims[in_trans_perm[i]]; } transed_out_grad.mutable_data(transed_out_dims, ctx.GetPlace()); - framework::NPUAttributeMap in_trans_attr = {{"perm", in_trans_perm}}; - - const auto& in_trans_runner = NpuOpRunner( - "TransposeD", {*out_grad}, {transed_out_grad}, in_trans_attr); + NpuOpRunner in_trans_runner; + in_trans_runner.SetType("Transpose") + .AddInput(*out_grad) + .AddInput(std::move(in_trans_perm)) + .AddOutput(transed_out_grad); in_trans_runner.Run(stream); Tensor sum_out; @@ -133,10 +134,12 @@ class IndexSelectGradNPUKernel : public framework::OpKernel { for (int i = 1 + dim; i < x_dims.size(); ++i) { out_trans_perm.push_back(i); } - framework::NPUAttributeMap out_trans_attr = {{"perm", out_trans_perm}}; x_grad->mutable_data(ctx.GetPlace()); - const auto& out_trans_runner = - NpuOpRunner("TransposeD", {sum_out}, {*x_grad}, out_trans_attr); + NpuOpRunner out_trans_runner; + out_trans_runner.SetType("Transpose") + .AddInput(sum_out) + .AddInput(std::move(out_trans_perm)) + .AddOutput(*x_grad); out_trans_runner.Run(stream); } } diff --git a/paddle/fluid/operators/transpose_op_npu.cc b/paddle/fluid/operators/transpose_op_npu.cc index 035ad5f3f314aa..7cc68e93c5d620 100644 --- a/paddle/fluid/operators/transpose_op_npu.cc +++ b/paddle/fluid/operators/transpose_op_npu.cc @@ -27,9 +27,12 @@ class TransposeNPUKernel : public framework::OpKernel { auto* x = ctx.Input("X"); auto* out = ctx.Output("Out"); std::vector axis = ctx.Attr>("axis"); - framework::NPUAttributeMap attr_input = {{"perm", axis}}; out->mutable_data(ctx.device_context().GetPlace()); - const auto& runner = NpuOpRunner("TransposeD", {*x}, {*out}, attr_input); + NpuOpRunner runner; + runner.SetType("Transpose") + .AddInput(*x) + .AddInput(std::move(axis)) + .AddOutput(*out); auto stream = ctx.template device_context() .stream(); @@ -51,9 +54,11 @@ class TransposeGradNPUKernel : public framework::OpKernel { reversed_axis[axis[i]] = i; } x_grad->mutable_data(ctx.GetPlace()); - framework::NPUAttributeMap attr_input = {{"perm", reversed_axis}}; - const auto& runner = - NpuOpRunner("TransposeD", {*out_grad}, {*x_grad}, attr_input); + NpuOpRunner runner; + runner.SetType("Transpose") + .AddInput(*out_grad) + .AddInput(std::move(reversed_axis)) + .AddOutput(*x_grad); auto stream = ctx.template device_context() .stream(); @@ -72,11 +77,17 @@ REGISTER_OP_NPU_KERNEL( ops::TransposeNPUKernel, ops::TransposeNPUKernel, +#ifdef PADDLE_WITH_ASCEND_INT64 + ops::TransposeNPUKernel, +#endif ops::TransposeNPUKernel, ops::TransposeNPUKernel); REGISTER_OP_NPU_KERNEL(transpose2_grad, ops::TransposeGradNPUKernel, ops::TransposeGradNPUKernel, ops::TransposeGradNPUKernel, +#ifdef PADDLE_WITH_ASCEND_INT64 + ops::TransposeGradNPUKernel, +#endif ops::TransposeGradNPUKernel, ops::TransposeGradNPUKernel); From 7f7199bfc0a05b279905911e6f5eebe4733a4cec Mon Sep 17 00:00:00 2001 From: Aganlengzi Date: Wed, 29 Sep 2021 19:55:06 +0800 Subject: [PATCH 2/4] add transpose2 int64_t unit test --- .../fluid/tests/unittests/npu/test_transpose_op_npu.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py index e95f3cc83cfb31..c49293d87badc2 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py @@ -66,5 +66,12 @@ def init_dtype(self): self.dtype = np.float16 +class TestTransposeOpInt64(TestTransposeOp): + no_need_check_grad = True + + def init_dtype(self): + self.dtype = np.int64 + + if __name__ == '__main__': unittest.main() From 20a1049f72ddbd18c32a1a7662d76128d66a205f Mon Sep 17 00:00:00 2001 From: Aganlengzi Date: Thu, 30 Sep 2021 02:02:51 +0800 Subject: [PATCH 3/4] add more transpose2 unit tests --- .../unittests/npu/test_transpose_op_npu.py | 96 +++++++++++++++---- 1 file changed, 79 insertions(+), 17 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py index c49293d87badc2..af69b33a91bd4c 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py @@ -31,47 +31,109 @@ def setUp(self): self.op_type = "transpose2" self.place = paddle.NPUPlace(0) self.init_dtype() - self.init_input_output() - self.init_kernel_type() - self.init_axis() + self.init_shape_axis() + self.x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype) self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(self.x)} - self.attrs = {'axis': [0, 2, 1, 3], 'data_format': 'AnyLayout'} + self.attrs = {'axis': self.axis, 'data_format': 'AnyLayout'} + self.out = np.transpose(self.x, self.axis) self.outputs = {'Out': self.out} def set_npu(self): self.__class__.use_npu = True - def init_kernel_type(self): - self.use_mkldnn = False - - def init_input_output(self): - self.x = np.random.uniform(0.1, 1, [8, 512, 12, 64]).astype(self.dtype) - self.out = np.transpose(self.x, [0, 2, 1, 3]) - def init_dtype(self): self.dtype = np.float32 - def init_axis(self): - self.axis = -1 + def init_shape_axis(self): + self.shape = (3, 40) + self.axis = (1, 0) def test_check_output(self): self.check_output_with_place(self.place) + def test_check_grad(self): + if self.dtype == np.int64: + return + self.check_grad_with_place(self.place, ['X'], 'Out') -class TestTransposeOpFP16(TestTransposeOp): - no_need_check_grad = True +class TestCase0(TestTransposeOp): + def init_shape_axis(self): + self.shape = (100, ) + self.axis = (0, ) + + +class TestCase1(TestTransposeOp): + def init_shape_axis(self): + self.shape = (3, 4, 10) + self.axis = (0, 2, 1) + + +class TestCase2(TestTransposeOp): + def init_shape_axis(self): + self.shape = (2, 3, 4, 5) + self.axis = (0, 2, 3, 1) + + +class TestCase3(TestTransposeOp): + def init_shape_axis(self): + self.shape = (2, 3, 4, 5, 6) + self.axis = (4, 2, 3, 1, 0) + + +class TestCase4(TestTransposeOp): + def init_shape_axis(self): + self.shape = (2, 3, 4, 5, 6, 1) + self.axis = (4, 2, 3, 1, 0, 5) + + +class TestCase5(TestTransposeOp): + def init_shape_axis(self): + self.shape = (2, 16, 96) + self.axis = (0, 2, 1) + + +class TestCase6(TestTransposeOp): + def init_shape_axis(self): + self.shape = (2, 10, 12, 16) + self.axis = (3, 1, 2, 0) + + +class TestCase7(TestTransposeOp): + def init_shape_axis(self): + self.shape = (2, 10, 2, 16) + self.axis = (0, 1, 3, 2) + + +class TestCase8(TestTransposeOp): + def init_shape_axis(self): + self.shape = (2, 3, 2, 3, 2, 4, 3, 3) + self.axis = (0, 1, 3, 2, 4, 5, 6, 7) + + +class TestCase9(TestTransposeOp): + def init_shape_axis(self): + self.shape = (2, 3, 2, 3, 2, 4, 3, 3) + self.axis = (6, 1, 3, 5, 0, 2, 4, 7) + + +class TestTransposeOpFP16(TestTransposeOp): def init_dtype(self): self.dtype = np.float16 + def test_check_grad(self): + self.check_grad_with_place( + self.place, ['X'], 'Out', max_relative_error=1e-2) -class TestTransposeOpInt64(TestTransposeOp): - no_need_check_grad = True +class TestTransposeOpInt64(TestTransposeOp): def init_dtype(self): self.dtype = np.int64 + def test_check_grad(self): + pass + if __name__ == '__main__': unittest.main() From 9905ae36f0bb9bec5f91b1a8d7baed75739e0ae0 Mon Sep 17 00:00:00 2001 From: Aganlengzi Date: Thu, 30 Sep 2021 11:24:18 +0800 Subject: [PATCH 4/4] update test_transpose_op_npu.py --- .../tests/unittests/npu/test_transpose_op_npu.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py index af69b33a91bd4c..b1a6bfcdaaadca 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_transpose_op_npu.py @@ -33,11 +33,9 @@ def setUp(self): self.init_dtype() self.init_shape_axis() - self.x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype) - self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(self.x)} + self.inputs = {'X': np.random.random(self.shape).astype(self.dtype)} self.attrs = {'axis': self.axis, 'data_format': 'AnyLayout'} - self.out = np.transpose(self.x, self.axis) - self.outputs = {'Out': self.out} + self.outputs = {'Out': self.inputs['X'].transpose(self.axis)} def set_npu(self): self.__class__.use_npu = True @@ -53,8 +51,6 @@ def test_check_output(self): self.check_output_with_place(self.place) def test_check_grad(self): - if self.dtype == np.int64: - return self.check_grad_with_place(self.place, ['X'], 'Out') @@ -123,8 +119,7 @@ def init_dtype(self): self.dtype = np.float16 def test_check_grad(self): - self.check_grad_with_place( - self.place, ['X'], 'Out', max_relative_error=1e-2) + pass class TestTransposeOpInt64(TestTransposeOp):