-
Notifications
You must be signed in to change notification settings - Fork 6k
[NPU] Support npu op expand_v2 and expand_v2_grad #34764
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
41da93c
e79e46c
a055ba6
4d80f3d
7aadfc9
32097e8
a65895c
40929fe
ef6088a
1771908
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,109 @@ | ||
|
|
||
| /* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. | ||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License. */ | ||
|
|
||
| #include "paddle/fluid/operators/expand_v2_op.h" | ||
| #include <memory> | ||
| #include <string> | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 14-15两行头文件不需要
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 已删除 |
||
| #include "paddle/fluid/operators/npu_op_runner.h" | ||
|
|
||
| namespace paddle { | ||
| namespace operators { | ||
|
|
||
| using Tensor = framework::Tensor; | ||
| template <typename DeviceContext, typename T> | ||
| class ExpandV2NPUKernel : public framework::OpKernel<T> { | ||
| public: | ||
| void Compute(const framework::ExecutionContext& ctx) const override { | ||
| auto* X = ctx.Input<framework::Tensor>("X"); | ||
| auto* Out = ctx.Output<framework::Tensor>("Out"); | ||
|
|
||
| std::vector<int> expand_shape; | ||
| auto list_expand_shapes_tensor = | ||
| ctx.MultiInput<framework::Tensor>("expand_shapes_tensor"); | ||
| if (ctx.HasInput("Shape")) { | ||
| auto* shape_tensor = ctx.Input<framework::LoDTensor>("Shape"); | ||
| std::vector<int> out_data; | ||
| TensorToVector(*shape_tensor, ctx.device_context(), &out_data); | ||
| for (int i = 0; i < static_cast<int>(out_data.size()); ++i) { | ||
| expand_shape.push_back(out_data[i]); | ||
| } | ||
| } else if (list_expand_shapes_tensor.size() > 0) { | ||
| // get tensor from | ||
| for (size_t i = 0; i < list_expand_shapes_tensor.size(); ++i) { | ||
| auto tensor = list_expand_shapes_tensor[i]; | ||
| std::vector<int> out_data; | ||
| TensorToVector(*tensor, ctx.device_context(), &out_data); | ||
| expand_shape.push_back(out_data[0]); | ||
| } | ||
| } else { | ||
| expand_shape = ctx.Attr<std::vector<int>>("shape"); | ||
| } | ||
|
|
||
| framework::NPUAttributeMap attr_input = {{"shape", expand_shape}}; | ||
|
|
||
| auto rank = X->dims().size(); | ||
| for (size_t i = 0; i < expand_shape.size(); ++i) { | ||
| PADDLE_ENFORCE_GT( | ||
| expand_shape[i], 0, | ||
| platform::errors::InvalidArgument( | ||
| "The %uth element of 'shape' for expand_v2_npu op must be " | ||
| "greater than 0, but the value given is %d.", | ||
| i, expand_shape[i])); | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 需要结合TestExpandV2OpRank4进行代码调整。
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. expand_v2_op_npu.cc中添加对expand_shape的再构造,考虑三种情况 1) expand_shape比input_dim维度要大的情况 2) expand_shape[i]维度大于0的情况 3) expand_shape[i]等于-1的情况 |
||
| PADDLE_ENFORCE_GE( | ||
| rank, 1, | ||
| platform::errors::InvalidArgument( | ||
| "The rank of the input 'X' for expand_v2_npu op must be positive, " | ||
| "but the value received is %d.", | ||
| rank)); | ||
| PADDLE_ENFORCE_LE( | ||
| rank, MAX_RANK_SUPPORTED, | ||
| platform::errors::InvalidArgument( | ||
| "The rank of the input 'X' for expand_v2_npu op must be less than " | ||
| "or equal to %d, but the value received is %d.", | ||
| MAX_RANK_SUPPORTED, rank)); | ||
| auto shape_size = expand_shape.size(); | ||
| PADDLE_ENFORCE_GE( | ||
| shape_size, rank, | ||
| platform::errors::InvalidArgument( | ||
| "The number (%d) of elements of 'shape' for expand_v2_npu op must " | ||
| "be " | ||
| "greater than or equal to the rank (%d) of the input 'X'.", | ||
| shape_size, rank)); | ||
| PADDLE_ENFORCE_LE(shape_size, MAX_RANK_SUPPORTED, | ||
| platform::errors::InvalidArgument( | ||
| "The number (%d) of elements of 'shape' for " | ||
| "expand_v2_npu op must be " | ||
| "less than or equal to %d.", | ||
| shape_size, MAX_RANK_SUPPORTED)); | ||
|
|
||
| framework::DDim out_dims = framework::make_ddim(expand_shape); | ||
| Out->Resize(out_dims); | ||
| Out->mutable_data<T>(ctx.GetPlace()); | ||
|
|
||
| const auto& runner = NpuOpRunner("ExpandD", {*X}, {*Out}, attr_input); | ||
| auto stream = | ||
| ctx.template device_context<paddle::platform::NPUDeviceContext>() | ||
| .stream(); | ||
| runner.Run(stream); | ||
| } | ||
| }; | ||
| } // namespace operators | ||
| } // namespace paddle | ||
|
|
||
| namespace ops = paddle::operators; | ||
| REGISTER_OP_NPU_KERNEL( | ||
| expand_v2, | ||
| ops::ExpandV2NPUKernel<paddle::platform::NPUDeviceContext, float>, | ||
| ops::ExpandV2NPUKernel<paddle::platform::NPUDeviceContext, | ||
| paddle::platform::float16>, | ||
| ops::ExpandV2NPUKernel<paddle::platform::NPUDeviceContext, int>); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,260 @@ | ||
| # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| from __future__ import print_function | ||
| import unittest | ||
| import sys | ||
| import numpy as np | ||
| sys.path.append("..") | ||
| from op_test import OpTest | ||
| import paddle.fluid as fluid | ||
| from paddle.fluid import Program, program_guard | ||
| import paddle | ||
|
|
||
| paddle.enable_static() | ||
| np.random.seed(10) | ||
|
|
||
|
|
||
| # CANN Op Support X: float16, float32, int32, int8 ,uint8 | ||
| # Situation 1: shape is a list(without tensor) | ||
| class TestExpandV2NPUOpRank1(OpTest): | ||
| def setUp(self): | ||
| self.set_npu() | ||
| self.place = paddle.NPUPlace(0) | ||
| self.op_type = "expand_v2" | ||
| self.dtype = np.float32 | ||
| self.init_data() | ||
|
|
||
| self.inputs = {'X': np.random.random(self.ori_shape).astype(self.dtype)} | ||
| self.attrs = {'shape': self.shape} | ||
| output = np.tile(self.inputs['X'], self.expand_times) | ||
| self.outputs = {'Out': output} | ||
|
|
||
| def set_npu(self): | ||
| self.__class__.use_npu = True | ||
|
|
||
| def init_data(self): | ||
| self.ori_shape = [100] | ||
| self.shape = [100] | ||
| self.expand_times = [1] | ||
|
|
||
| def test_check_output(self): | ||
| self.check_output_with_place(self.place) | ||
|
|
||
| # def test_check_grad(self): | ||
| # self.check_grad(['X'], 'Out') | ||
|
|
||
|
|
||
| class TestExpandV2OpRank2_DimExpanding(TestExpandV2NPUOpRank1): | ||
| def init_data(self): | ||
| self.ori_shape = [120] | ||
| self.shape = [2, 120] | ||
| self.expand_times = [2, 1] | ||
|
|
||
|
|
||
| class TestExpandV2OpRank2(TestExpandV2NPUOpRank1): | ||
| def init_data(self): | ||
| self.ori_shape = [1, 140] | ||
| self.shape = [12, 140] | ||
| self.expand_times = [12, 1] | ||
|
|
||
|
|
||
| class TestExpandV2OpRank3_Corner(TestExpandV2NPUOpRank1): | ||
| def init_data(self): | ||
| self.ori_shape = (2, 10, 5) | ||
| self.shape = (2, 10, 5) | ||
| self.expand_times = (1, 1, 1) | ||
|
|
||
|
|
||
| # CANN Op doesn't support shape = -1 | ||
| # class TestExpandV2OpRank4(TestExpandV2NPUOpRank1): | ||
| # def init_data(self): | ||
| # self.ori_shape = (2, 4, 5, 7) | ||
| # self.shape = (-1, -1, -1, -1) | ||
| # self.expand_times = (1, 1, 1, 1) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 确认下,在CPU/CUDA代码中,对于self.shape = (-1, -1, -1, -1)的情况时如何处理的?需要看下expand_v2_op.h的CPU计算逻辑,应该最后可以得到一个正值的out_dims的结果,之后看下是否可以通过NPU的ExpandD或者BoradCastD等算子进行实现。
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 已添加三个shape中有-1情况的单测用例,单测已通过 |
||
|
|
||
|
|
||
| # Situation 2: shape is a list(with tensor) | ||
| class TestExpandV2OpNPURank1_tensor_attr(OpTest): | ||
| def setUp(self): | ||
| self.set_npu() | ||
| self.place = paddle.NPUPlace(0) | ||
| self.op_type = "expand_v2" | ||
| self.init_data() | ||
| self.dtype = np.float32 | ||
| expand_shapes_tensor = [] | ||
| for index, ele in enumerate(self.expand_shape): | ||
| expand_shapes_tensor.append(("x" + str(index), np.ones( | ||
| (1)).astype('int32') * ele)) | ||
|
|
||
| self.inputs = { | ||
| 'X': np.random.random(self.ori_shape).astype(self.dtype), | ||
| 'expand_shapes_tensor': expand_shapes_tensor, | ||
| } | ||
| self.attrs = {"shape": self.infer_expand_shape} | ||
| output = np.tile(self.inputs['X'], self.expand_times) | ||
| self.outputs = {'Out': output} | ||
|
|
||
| def set_npu(self): | ||
| self.__class__.use_npu = True | ||
|
|
||
| def init_data(self): | ||
| self.ori_shape = [100] | ||
| self.expand_times = [1] | ||
| self.expand_shape = [100] | ||
| self.infer_expand_shape = [-1] | ||
|
|
||
| def test_check_output(self): | ||
| self.check_output_with_place(self.place) | ||
|
|
||
| # def test_check_grad(self): | ||
| # self.check_grad(['X'], 'Out') | ||
|
|
||
|
|
||
| class TestExpandV2OpRank2_Corner_tensor_attr( | ||
| TestExpandV2OpNPURank1_tensor_attr): | ||
| def init_data(self): | ||
| self.ori_shape = [12, 14] | ||
| self.expand_times = [1, 1] | ||
| self.expand_shape = [12, 14] | ||
| self.infer_expand_shape = [12, -1] | ||
|
|
||
|
|
||
| # Situation 3: shape is a tensor | ||
| class TestExpandV2NPUOpRank1_tensor(OpTest): | ||
| def setUp(self): | ||
| self.set_npu() | ||
| self.place = paddle.NPUPlace(0) | ||
| self.op_type = "expand_v2" | ||
| self.init_data() | ||
| self.dtype = np.float32 | ||
|
|
||
| self.inputs = { | ||
| 'X': np.random.random(self.ori_shape).astype(self.dtype), | ||
| 'Shape': np.array(self.expand_shape).astype("int32"), | ||
| } | ||
| self.attrs = {} | ||
| output = np.tile(self.inputs['X'], self.expand_times) | ||
| self.outputs = {'Out': output} | ||
|
|
||
| def set_npu(self): | ||
| self.__class__.use_npu = True | ||
|
|
||
| def init_data(self): | ||
| self.ori_shape = [100] | ||
| self.expand_times = [2, 1] | ||
| self.expand_shape = [2, 100] | ||
|
|
||
| def test_check_output(self): | ||
| self.check_output_with_place(self.place) | ||
|
|
||
| # def test_check_grad(self): | ||
| # self.check_grad(['X'], 'Out') | ||
|
|
||
|
|
||
| # Situation 4: input x is float16 | ||
| class TestExpandV2OpInteger(OpTest): | ||
| def setUp(self): | ||
| self.set_npu() | ||
| self.place = paddle.NPUPlace(0) | ||
| self.op_type = "expand_v2" | ||
| self.dtype = np.float16 | ||
| self.ori_shape = (2, 4, 5) | ||
| self.inputs = {'X': np.random.random(self.ori_shape).astype(self.dtype)} | ||
| self.attrs = {'shape': [2, 4, 5]} | ||
| output = np.tile(self.inputs['X'], (1, 1, 1)) | ||
| self.outputs = {'Out': output} | ||
|
|
||
| def set_npu(self): | ||
| self.__class__.use_npu = True | ||
|
|
||
| def test_check_output(self): | ||
| self.check_output_with_place(self.place) | ||
|
|
||
|
|
||
| # Situation 5: input x is int32 | ||
| class TestExpandV2OpInteger(OpTest): | ||
| def setUp(self): | ||
| self.set_npu() | ||
| self.place = paddle.NPUPlace(0) | ||
| self.op_type = "expand_v2" | ||
| self.inputs = { | ||
| 'X': np.random.randint( | ||
| 10, size=(2, 4, 5)).astype("int32") | ||
| } | ||
| self.attrs = {'shape': [2, 4, 5]} | ||
| output = np.tile(self.inputs['X'], (1, 1, 1)) | ||
| self.outputs = {'Out': output} | ||
|
|
||
| def set_npu(self): | ||
| self.__class__.use_npu = True | ||
|
|
||
| def test_check_output(self): | ||
| self.check_output_with_place(self.place) | ||
|
|
||
|
|
||
| class TestExpandV2Error(unittest.TestCase): | ||
| def test_errors(self): | ||
| with program_guard(Program(), Program()): | ||
| x1 = fluid.create_lod_tensor( | ||
| np.array([[-1]]), [[1]], paddle.NPUPlace(0)) | ||
| shape = [2, 2] | ||
| self.assertRaises(TypeError, paddle.tensor.expand, x1, shape) | ||
| x2 = fluid.layers.data(name='x2', shape=[2], dtype="uint8") | ||
| self.assertRaises(TypeError, paddle.tensor.expand, x2, shape) | ||
| x3 = fluid.layers.data(name='x3', shape=[2], dtype="bool") | ||
| x3.stop_gradient = False | ||
| self.assertRaises(ValueError, paddle.tensor.expand, x3, shape) | ||
|
|
||
|
|
||
| # Test python API | ||
| class TestExpandV2API(unittest.TestCase): | ||
| def test_static(self): | ||
| with fluid.program_guard(fluid.Program(), fluid.Program()): | ||
| input = np.random.random([12, 14]).astype("float32") | ||
| x = fluid.layers.data( | ||
| name='x', | ||
| shape=[12, 14], | ||
| append_batch_size=False, | ||
| dtype="float32") | ||
|
|
||
| positive_2 = fluid.layers.fill_constant([1], "int32", 12) | ||
| expand_shape = fluid.layers.data( | ||
| name="expand_shape", | ||
| shape=[2], | ||
| append_batch_size=False, | ||
| dtype="int32") | ||
|
|
||
| out_1 = paddle.expand(x, shape=[12, 14]) | ||
| out_2 = paddle.expand(x, shape=[positive_2, 14]) | ||
| out_3 = paddle.expand(x, shape=expand_shape) | ||
|
|
||
| # g0 = fluid.backward.calc_gradient(out_2, x) | ||
|
|
||
| exe = fluid.Executor(place=paddle.NPUPlace(0)) | ||
| res_1, res_2, res_3 = exe.run(fluid.default_main_program(), | ||
| feed={ | ||
| "x": input, | ||
| "expand_shape": | ||
| np.array([12, 14]).astype("int32") | ||
| }, | ||
| fetch_list=[out_1, out_2, out_3]) | ||
|
|
||
| assert np.array_equal(res_1, np.tile(input, (1, 1))) | ||
| assert np.array_equal(res_2, np.tile(input, (1, 1))) | ||
| assert np.array_equal(res_3, np.tile(input, (1, 1))) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| unittest.main() | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Plz check the copyright format.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
reformat the copyright done.