Skip to content

Commit 71ead93

Browse files
authored
[INTEL_HPU] Add support of paddle.tile (PaddlePaddle#1691)
1 parent de85179 commit 71ead93

File tree

2 files changed

+391
-0
lines changed

2 files changed

+391
-0
lines changed
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License"); you may
4+
// not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "habanalabs/perf_lib_layer_params.h"
16+
#include "kernels/funcs.h"
17+
#include "kernels/hpu_operator.h"
18+
#include "paddle/extension.h"
19+
#include "utils/utils.h"
20+
21+
namespace custom_kernel {
22+
23+
class Tile : public HpuOperator {
24+
public:
25+
Tile() : HpuOperator("repeat_pt_fwd_", false) {}
26+
27+
void AddNode(ConvertTensors& ct, ns_RepeatPt::Params& params) {
28+
auto inputs = ct.GetTensors();
29+
auto outputs = ct.GetTensors(false);
30+
31+
auto x_tensor = createTensor(inputs[0].dims.size(),
32+
inputs[0].type,
33+
inputs[0].dims,
34+
true,
35+
inputs[0].name);
36+
37+
auto out_tensor = createTensor(outputs[0].dims.size(),
38+
outputs[0].type,
39+
outputs[0].dims,
40+
true,
41+
outputs[0].name);
42+
43+
synTensor ins[] = {x_tensor};
44+
synTensor outs[] = {out_tensor};
45+
std::string guid = guid_ + SynDataTypeToStr(inputs[0].type);
46+
synStatus status = synNodeCreate(graphHandle_,
47+
ins,
48+
outs,
49+
1,
50+
1,
51+
&params,
52+
sizeof(params),
53+
guid.c_str(),
54+
guid.c_str(),
55+
nullptr,
56+
nullptr);
57+
PD_CHECK(
58+
status == synSuccess, "[RUNTIME] synNodeCreate tile failed = ", status);
59+
}
60+
};
61+
62+
template <typename T, typename Context>
63+
void TileKernel(const Context& dev_ctx,
64+
const phi::DenseTensor& x,
65+
const phi::IntArray& repeat_times,
66+
phi::DenseTensor* out) {
67+
VLOG(6) << "call HPU TileKernel";
68+
69+
dev_ctx.template Alloc<T>(out);
70+
71+
ns_RepeatPt::Params params;
72+
int max_repeat_times = sizeof(params.repeat) / sizeof(params.repeat[0]);
73+
int repeat_times_size = repeat_times.size();
74+
PADDLE_ENFORCE_GE(
75+
max_repeat_times,
76+
repeat_times_size,
77+
phi::errors::ResourceExhausted("unsupported repeat_times size."));
78+
79+
for (int i = 0; i < repeat_times_size; i++) {
80+
params.repeat[i] = repeat_times[i];
81+
}
82+
params.size = repeat_times_size;
83+
84+
ConvertTensors ct;
85+
ct.Add(x);
86+
ct.Add(out, false);
87+
std::vector<DIMS> inputs_dims = ct.GetDims();
88+
89+
std::string guid_prefix = "TileKernel";
90+
OpCacheOperator op_info;
91+
op_info.prepareOpInfo<T, ns_RepeatPt::Params>(
92+
guid_prefix, inputs_dims, &params);
93+
auto recipe = op_info.GetRecipe();
94+
95+
if (recipe == nullptr) {
96+
Tile op;
97+
98+
op.AddNode(ct, params);
99+
op.Compile();
100+
op_info.setOp(op);
101+
102+
recipe = op_info.GetRecipe();
103+
}
104+
105+
std::map<std::string, uint64_t> tensors = ct.GetDeviceAddr();
106+
RecipeRunner runner(recipe);
107+
runner.Run(reinterpret_cast<C_Stream>(dev_ctx.stream()), tensors);
108+
}
109+
110+
} // namespace custom_kernel
111+
112+
PD_REGISTER_PLUGIN_KERNEL(tile,
113+
intel_hpu,
114+
ALL_LAYOUT,
115+
custom_kernel::TileKernel,
116+
bool,
117+
float,
118+
double,
119+
int,
120+
int64_t,
121+
phi::dtype::float16,
122+
phi::dtype::bfloat16) {}
Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import unittest
16+
import numpy as np
17+
18+
from tests.op_test import OpTest
19+
import paddle
20+
21+
import os
22+
23+
paddle.enable_static()
24+
np.random.seed(10)
25+
26+
intel_hpus_module_id = os.environ.get("FLAGS_selected_intel_hpus", 0)
27+
28+
29+
# repeat_times is a list (without tensor)
30+
class TestTileOpRank1(OpTest):
31+
def setUp(self):
32+
self.set_intel_hpu()
33+
self.place = paddle.CustomPlace("intel_hpu", int(intel_hpus_module_id))
34+
self.op_type = "tile"
35+
self.python_api = paddle.scatter
36+
self.init_data()
37+
38+
self.inputs = {"X": np.random.random(self.ori_shape).astype("float32")}
39+
self.attrs = {"repeat_times": self.repeat_times}
40+
output = np.tile(self.inputs["X"], self.repeat_times)
41+
self.outputs = {"Out": output}
42+
43+
def set_intel_hpu(self):
44+
self.__class__.use_custom_device = True
45+
self.__class__.no_need_check_grad = True
46+
47+
def init_data(self):
48+
self.ori_shape = [100]
49+
self.repeat_times = [2]
50+
51+
def test_check_output(self):
52+
self.check_output_with_place(self.place)
53+
54+
55+
# with dimension expanding
56+
class TestTileOpRank2Expanding(TestTileOpRank1):
57+
def init_data(self):
58+
self.ori_shape = [120]
59+
self.repeat_times = [2, 2]
60+
61+
62+
class TestTileOpRank2(TestTileOpRank1):
63+
def init_data(self):
64+
self.ori_shape = [12, 14]
65+
self.repeat_times = [2, 3]
66+
67+
68+
class TestTileOpRank3_Corner(TestTileOpRank1):
69+
def init_data(self):
70+
self.ori_shape = (2, 10, 5)
71+
self.repeat_times = (1, 1, 1)
72+
73+
74+
class TestTileOpRank3_Corner2(TestTileOpRank1):
75+
def init_data(self):
76+
self.ori_shape = (2, 10, 5)
77+
self.repeat_times = (2, 2)
78+
79+
80+
class TestTileOpRank3(TestTileOpRank1):
81+
def init_data(self):
82+
self.ori_shape = (2, 4, 15)
83+
self.repeat_times = (2, 1, 4)
84+
85+
86+
class TestTileOpRank4(TestTileOpRank1):
87+
def init_data(self):
88+
self.ori_shape = (2, 4, 5, 7)
89+
self.repeat_times = (3, 2, 1, 2)
90+
91+
92+
class TestTileOpRank_ZeroDim1(TestTileOpRank1):
93+
def init_data(self):
94+
self.ori_shape = []
95+
self.repeat_times = []
96+
97+
98+
class TestTileOpRank_ZeroDim2(TestTileOpRank1):
99+
def init_data(self):
100+
self.ori_shape = []
101+
self.repeat_times = [2]
102+
103+
104+
class TestTileOpRank_ZeroDim3(TestTileOpRank1):
105+
def init_data(self):
106+
self.ori_shape = []
107+
self.repeat_times = [2, 3]
108+
109+
110+
# repeat_times is a list (with tensor)
111+
class TestTileOpRank1_tensor_attr(OpTest):
112+
def setUp(self):
113+
self.set_intel_hpu()
114+
self.place = paddle.CustomPlace("intel_hpu", int(intel_hpus_module_id))
115+
self.op_type = "tile"
116+
self.init_data()
117+
repeat_times_tensor = []
118+
for index, ele in enumerate(self.repeat_times):
119+
repeat_times_tensor.append(
120+
("x" + str(index), np.ones((1)).astype("int32") * ele)
121+
)
122+
123+
self.inputs = {
124+
"X": np.random.random(self.ori_shape).astype("float32"),
125+
"repeat_times_tensor": repeat_times_tensor,
126+
}
127+
self.attrs = {"repeat_times": self.infer_repeat_times}
128+
output = np.tile(self.inputs["X"], self.repeat_times)
129+
self.outputs = {"Out": output}
130+
131+
def set_intel_hpu(self):
132+
self.__class__.use_custom_device = True
133+
self.__class__.no_need_check_grad = True
134+
135+
def init_data(self):
136+
self.ori_shape = [100]
137+
self.repeat_times = [2]
138+
self.infer_repeat_times = [-1]
139+
140+
def test_check_output(self):
141+
self.check_output_with_place(self.place)
142+
143+
144+
class TestTileOpRank2_Corner_tensor_attr(TestTileOpRank1_tensor_attr):
145+
def init_data(self):
146+
self.ori_shape = [12, 14]
147+
self.repeat_times = [1, 1]
148+
self.infer_repeat_times = [1, -1]
149+
150+
151+
class TestTileOpRank2_attr_tensor(TestTileOpRank1_tensor_attr):
152+
def init_data(self):
153+
self.ori_shape = [12, 14]
154+
self.repeat_times = [2, 3]
155+
self.infer_repeat_times = [-1, 3]
156+
157+
158+
# repeat_times is a tensor
159+
class TestTileOpRank1_tensor(OpTest):
160+
def setUp(self):
161+
self.set_intel_hpu()
162+
self.place = paddle.CustomPlace("intel_hpu", int(intel_hpus_module_id))
163+
self.op_type = "tile"
164+
self.init_data()
165+
166+
self.inputs = {
167+
"X": np.random.random(self.ori_shape).astype("float32"),
168+
"RepeatTimes": np.array(self.repeat_times).astype("int32"),
169+
}
170+
self.attrs = {}
171+
output = np.tile(self.inputs["X"], self.repeat_times)
172+
self.outputs = {"Out": output}
173+
174+
def set_intel_hpu(self):
175+
self.__class__.use_custom_device = True
176+
self.__class__.no_need_check_grad = True
177+
178+
def init_data(self):
179+
self.ori_shape = [100]
180+
self.repeat_times = [2]
181+
182+
def test_check_output(self):
183+
self.check_output_with_place(self.place)
184+
185+
186+
class TestTileOpRank2_tensor(TestTileOpRank1_tensor):
187+
def init_data(self):
188+
self.ori_shape = [12, 14]
189+
self.repeat_times = [2, 3]
190+
191+
192+
# input x is Integer
193+
class TestTileOpInteger(OpTest):
194+
def setUp(self):
195+
self.set_intel_hpu()
196+
self.place = paddle.CustomPlace("intel_hpu", int(intel_hpus_module_id))
197+
self.op_type = "tile"
198+
self.inputs = {"X": np.random.randint(10, size=(4, 4, 5)).astype("int32")}
199+
self.attrs = {"repeat_times": [2, 1, 4]}
200+
output = np.tile(self.inputs["X"], (2, 1, 4))
201+
self.outputs = {"Out": output}
202+
203+
def set_intel_hpu(self):
204+
self.__class__.use_custom_device = True
205+
self.__class__.no_need_check_grad = True
206+
207+
def test_check_output(self):
208+
self.check_output_with_place(self.place)
209+
210+
211+
# input x is Integer
212+
class TestTileOpInt64_t(OpTest):
213+
def setUp(self):
214+
self.set_intel_hpu()
215+
self.place = paddle.CustomPlace("intel_hpu", int(intel_hpus_module_id))
216+
self.op_type = "tile"
217+
self.inputs = {"X": np.random.randint(10, size=(2, 4, 5)).astype("int64")}
218+
self.attrs = {"repeat_times": [2, 1, 4]}
219+
output = np.tile(self.inputs["X"], (2, 1, 4))
220+
self.outputs = {"Out": output}
221+
222+
def set_intel_hpu(self):
223+
self.__class__.use_custom_device = True
224+
self.__class__.no_need_check_grad = True
225+
226+
def test_check_output(self):
227+
self.check_output_with_place(self.place)
228+
229+
230+
# input x is Bool
231+
class TestTileOpBool(OpTest):
232+
def setUp(self):
233+
self.set_intel_hpu()
234+
self.place = paddle.CustomPlace("intel_hpu", int(intel_hpus_module_id))
235+
self.op_type = "tile"
236+
self.inputs = {"X": np.random.randint(1, size=(2, 4, 5)).astype("bool")}
237+
self.attrs = {"repeat_times": [2, 1, 4]}
238+
output = np.tile(self.inputs["X"], (2, 1, 4))
239+
self.outputs = {"Out": output}
240+
241+
def set_intel_hpu(self):
242+
self.__class__.use_custom_device = True
243+
self.__class__.no_need_check_grad = True
244+
245+
def test_check_output(self):
246+
self.check_output_with_place(self.place)
247+
248+
249+
# input x is FP16
250+
class TestTileOpFloat16(OpTest):
251+
def setUp(self):
252+
self.set_intel_hpu()
253+
self.place = paddle.CustomPlace("intel_hpu", int(intel_hpus_module_id))
254+
self.op_type = "tile"
255+
self.inputs = {"X": np.random.randint(10, size=(2, 10, 5)).astype("float16")}
256+
self.attrs = {"repeat_times": [2, 1, 4]}
257+
output = np.tile(self.inputs["X"], (2, 1, 4))
258+
self.outputs = {"Out": output}
259+
260+
def set_intel_hpu(self):
261+
self.__class__.use_custom_device = True
262+
self.__class__.no_need_check_grad = True
263+
264+
def test_check_output(self):
265+
self.check_output_with_place(self.place)
266+
267+
268+
if __name__ == "__main__":
269+
unittest.main()

0 commit comments

Comments
 (0)