Skip to content
Merged
109 changes: 109 additions & 0 deletions paddle/fluid/operators/expand_v2_op_npu.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@

/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Plz check the copyright format.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reformat the copyright done.


#include "paddle/fluid/operators/expand_v2_op.h"
#include <memory>
#include <string>
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

14-15两行头文件不需要

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

已删除

#include "paddle/fluid/operators/npu_op_runner.h"

namespace paddle {
namespace operators {

using Tensor = framework::Tensor;
template <typename DeviceContext, typename T>
class ExpandV2NPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* X = ctx.Input<framework::Tensor>("X");
auto* Out = ctx.Output<framework::Tensor>("Out");

std::vector<int> expand_shape;
auto list_expand_shapes_tensor =
ctx.MultiInput<framework::Tensor>("expand_shapes_tensor");
if (ctx.HasInput("Shape")) {
auto* shape_tensor = ctx.Input<framework::LoDTensor>("Shape");
std::vector<int> out_data;
TensorToVector(*shape_tensor, ctx.device_context(), &out_data);
for (int i = 0; i < static_cast<int>(out_data.size()); ++i) {
expand_shape.push_back(out_data[i]);
}
} else if (list_expand_shapes_tensor.size() > 0) {
// get tensor from
for (size_t i = 0; i < list_expand_shapes_tensor.size(); ++i) {
auto tensor = list_expand_shapes_tensor[i];
std::vector<int> out_data;
TensorToVector(*tensor, ctx.device_context(), &out_data);
expand_shape.push_back(out_data[0]);
}
} else {
expand_shape = ctx.Attr<std::vector<int>>("shape");
}

framework::NPUAttributeMap attr_input = {{"shape", expand_shape}};

auto rank = X->dims().size();
for (size_t i = 0; i < expand_shape.size(); ++i) {
PADDLE_ENFORCE_GT(
expand_shape[i], 0,
platform::errors::InvalidArgument(
"The %uth element of 'shape' for expand_v2_npu op must be "
"greater than 0, but the value given is %d.",
i, expand_shape[i]));
}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

需要结合TestExpandV2OpRank4进行代码调整。

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

expand_v2_op_npu.cc中添加对expand_shape的再构造,考虑三种情况 1) expand_shape比input_dim维度要大的情况 2) expand_shape[i]维度大于0的情况 3) expand_shape[i]等于-1的情况

PADDLE_ENFORCE_GE(
rank, 1,
platform::errors::InvalidArgument(
"The rank of the input 'X' for expand_v2_npu op must be positive, "
"but the value received is %d.",
rank));
PADDLE_ENFORCE_LE(
rank, MAX_RANK_SUPPORTED,
platform::errors::InvalidArgument(
"The rank of the input 'X' for expand_v2_npu op must be less than "
"or equal to %d, but the value received is %d.",
MAX_RANK_SUPPORTED, rank));
auto shape_size = expand_shape.size();
PADDLE_ENFORCE_GE(
shape_size, rank,
platform::errors::InvalidArgument(
"The number (%d) of elements of 'shape' for expand_v2_npu op must "
"be "
"greater than or equal to the rank (%d) of the input 'X'.",
shape_size, rank));
PADDLE_ENFORCE_LE(shape_size, MAX_RANK_SUPPORTED,
platform::errors::InvalidArgument(
"The number (%d) of elements of 'shape' for "
"expand_v2_npu op must be "
"less than or equal to %d.",
shape_size, MAX_RANK_SUPPORTED));

framework::DDim out_dims = framework::make_ddim(expand_shape);
Out->Resize(out_dims);
Out->mutable_data<T>(ctx.GetPlace());

const auto& runner = NpuOpRunner("ExpandD", {*X}, {*Out}, attr_input);
auto stream =
ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream();
runner.Run(stream);
}
};
} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;
REGISTER_OP_NPU_KERNEL(
expand_v2,
ops::ExpandV2NPUKernel<paddle::platform::NPUDeviceContext, float>,
ops::ExpandV2NPUKernel<paddle::platform::NPUDeviceContext,
paddle::platform::float16>,
ops::ExpandV2NPUKernel<paddle::platform::NPUDeviceContext, int>);
260 changes: 260 additions & 0 deletions python/paddle/fluid/tests/unittests/npu/test_expand_v2_op_npu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function
import unittest
import sys
import numpy as np
sys.path.append("..")
from op_test import OpTest
import paddle.fluid as fluid
from paddle.fluid import Program, program_guard
import paddle

paddle.enable_static()
np.random.seed(10)


# CANN Op Support X: float16, float32, int32, int8 ,uint8
# Situation 1: shape is a list(without tensor)
class TestExpandV2NPUOpRank1(OpTest):
def setUp(self):
self.set_npu()
self.place = paddle.NPUPlace(0)
self.op_type = "expand_v2"
self.dtype = np.float32
self.init_data()

self.inputs = {'X': np.random.random(self.ori_shape).astype(self.dtype)}
self.attrs = {'shape': self.shape}
output = np.tile(self.inputs['X'], self.expand_times)
self.outputs = {'Out': output}

def set_npu(self):
self.__class__.use_npu = True

def init_data(self):
self.ori_shape = [100]
self.shape = [100]
self.expand_times = [1]

def test_check_output(self):
self.check_output_with_place(self.place)

# def test_check_grad(self):
# self.check_grad(['X'], 'Out')


class TestExpandV2OpRank2_DimExpanding(TestExpandV2NPUOpRank1):
def init_data(self):
self.ori_shape = [120]
self.shape = [2, 120]
self.expand_times = [2, 1]


class TestExpandV2OpRank2(TestExpandV2NPUOpRank1):
def init_data(self):
self.ori_shape = [1, 140]
self.shape = [12, 140]
self.expand_times = [12, 1]


class TestExpandV2OpRank3_Corner(TestExpandV2NPUOpRank1):
def init_data(self):
self.ori_shape = (2, 10, 5)
self.shape = (2, 10, 5)
self.expand_times = (1, 1, 1)


# CANN Op doesn't support shape = -1
# class TestExpandV2OpRank4(TestExpandV2NPUOpRank1):
# def init_data(self):
# self.ori_shape = (2, 4, 5, 7)
# self.shape = (-1, -1, -1, -1)
# self.expand_times = (1, 1, 1, 1)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

确认下,在CPU/CUDA代码中,对于self.shape = (-1, -1, -1, -1)的情况时如何处理的?需要看下expand_v2_op.h的CPU计算逻辑,应该最后可以得到一个正值的out_dims的结果,之后看下是否可以通过NPU的ExpandD或者BoradCastD等算子进行实现。

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

已添加三个shape中有-1情况的单测用例,单测已通过



# Situation 2: shape is a list(with tensor)
class TestExpandV2OpNPURank1_tensor_attr(OpTest):
def setUp(self):
self.set_npu()
self.place = paddle.NPUPlace(0)
self.op_type = "expand_v2"
self.init_data()
self.dtype = np.float32
expand_shapes_tensor = []
for index, ele in enumerate(self.expand_shape):
expand_shapes_tensor.append(("x" + str(index), np.ones(
(1)).astype('int32') * ele))

self.inputs = {
'X': np.random.random(self.ori_shape).astype(self.dtype),
'expand_shapes_tensor': expand_shapes_tensor,
}
self.attrs = {"shape": self.infer_expand_shape}
output = np.tile(self.inputs['X'], self.expand_times)
self.outputs = {'Out': output}

def set_npu(self):
self.__class__.use_npu = True

def init_data(self):
self.ori_shape = [100]
self.expand_times = [1]
self.expand_shape = [100]
self.infer_expand_shape = [-1]

def test_check_output(self):
self.check_output_with_place(self.place)

# def test_check_grad(self):
# self.check_grad(['X'], 'Out')


class TestExpandV2OpRank2_Corner_tensor_attr(
TestExpandV2OpNPURank1_tensor_attr):
def init_data(self):
self.ori_shape = [12, 14]
self.expand_times = [1, 1]
self.expand_shape = [12, 14]
self.infer_expand_shape = [12, -1]


# Situation 3: shape is a tensor
class TestExpandV2NPUOpRank1_tensor(OpTest):
def setUp(self):
self.set_npu()
self.place = paddle.NPUPlace(0)
self.op_type = "expand_v2"
self.init_data()
self.dtype = np.float32

self.inputs = {
'X': np.random.random(self.ori_shape).astype(self.dtype),
'Shape': np.array(self.expand_shape).astype("int32"),
}
self.attrs = {}
output = np.tile(self.inputs['X'], self.expand_times)
self.outputs = {'Out': output}

def set_npu(self):
self.__class__.use_npu = True

def init_data(self):
self.ori_shape = [100]
self.expand_times = [2, 1]
self.expand_shape = [2, 100]

def test_check_output(self):
self.check_output_with_place(self.place)

# def test_check_grad(self):
# self.check_grad(['X'], 'Out')


# Situation 4: input x is float16
class TestExpandV2OpInteger(OpTest):
def setUp(self):
self.set_npu()
self.place = paddle.NPUPlace(0)
self.op_type = "expand_v2"
self.dtype = np.float16
self.ori_shape = (2, 4, 5)
self.inputs = {'X': np.random.random(self.ori_shape).astype(self.dtype)}
self.attrs = {'shape': [2, 4, 5]}
output = np.tile(self.inputs['X'], (1, 1, 1))
self.outputs = {'Out': output}

def set_npu(self):
self.__class__.use_npu = True

def test_check_output(self):
self.check_output_with_place(self.place)


# Situation 5: input x is int32
class TestExpandV2OpInteger(OpTest):
def setUp(self):
self.set_npu()
self.place = paddle.NPUPlace(0)
self.op_type = "expand_v2"
self.inputs = {
'X': np.random.randint(
10, size=(2, 4, 5)).astype("int32")
}
self.attrs = {'shape': [2, 4, 5]}
output = np.tile(self.inputs['X'], (1, 1, 1))
self.outputs = {'Out': output}

def set_npu(self):
self.__class__.use_npu = True

def test_check_output(self):
self.check_output_with_place(self.place)


class TestExpandV2Error(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
x1 = fluid.create_lod_tensor(
np.array([[-1]]), [[1]], paddle.NPUPlace(0))
shape = [2, 2]
self.assertRaises(TypeError, paddle.tensor.expand, x1, shape)
x2 = fluid.layers.data(name='x2', shape=[2], dtype="uint8")
self.assertRaises(TypeError, paddle.tensor.expand, x2, shape)
x3 = fluid.layers.data(name='x3', shape=[2], dtype="bool")
x3.stop_gradient = False
self.assertRaises(ValueError, paddle.tensor.expand, x3, shape)


# Test python API
class TestExpandV2API(unittest.TestCase):
def test_static(self):
with fluid.program_guard(fluid.Program(), fluid.Program()):
input = np.random.random([12, 14]).astype("float32")
x = fluid.layers.data(
name='x',
shape=[12, 14],
append_batch_size=False,
dtype="float32")

positive_2 = fluid.layers.fill_constant([1], "int32", 12)
expand_shape = fluid.layers.data(
name="expand_shape",
shape=[2],
append_batch_size=False,
dtype="int32")

out_1 = paddle.expand(x, shape=[12, 14])
out_2 = paddle.expand(x, shape=[positive_2, 14])
out_3 = paddle.expand(x, shape=expand_shape)

# g0 = fluid.backward.calc_gradient(out_2, x)

exe = fluid.Executor(place=paddle.NPUPlace(0))
res_1, res_2, res_3 = exe.run(fluid.default_main_program(),
feed={
"x": input,
"expand_shape":
np.array([12, 14]).astype("int32")
},
fetch_list=[out_1, out_2, out_3])

assert np.array_equal(res_1, np.tile(input, (1, 1)))
assert np.array_equal(res_2, np.tile(input, (1, 1)))
assert np.array_equal(res_3, np.tile(input, (1, 1)))


if __name__ == "__main__":
unittest.main()