Skip to content

Commit c49d60e

Browse files
author
wangbingnan03
committed
Debug!slice to support tensorarray.
1 parent dfdd2e8 commit c49d60e

7 files changed

Lines changed: 490 additions & 59 deletions

File tree

lite/kernels/x86/scale_compute.h

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,25 @@ namespace kernels {
2626
namespace x86 {
2727

2828
template <typename T>
29-
void scale_compute(
29+
static void scale_compute(
3030
const T* x, T* out, int size, T scale, T bias, bool bias_before) {
3131
if (bias_before) bias *= scale;
3232
for (int i = 0; i < size; i++) {
3333
out[i] = x[i] * scale + bias;
3434
}
3535
}
3636

37+
template <typename T>
38+
static void print_tensor(lite::Tensor* Tensor) {
39+
std::cout << "*********tensor********" << std::endl;
40+
T* data = Tensor->mutable_data<T>();
41+
std::cout << "Tensor len: " << Tensor->data_size() << std::endl;
42+
for (int j = 0; j < Tensor->data_size() && j < 10; j++) {
43+
std::cout << data[j] << " ";
44+
}
45+
std::cout << std::endl;
46+
}
47+
3748
template <typename T>
3849
class ScaleCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
3950
public:
@@ -49,6 +60,13 @@ class ScaleCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
4960
scale,
5061
bias,
5162
!param.bias_after_scale);
63+
std::cout << "********scale********" << std::endl;
64+
std::cout << "Input: " << std::endl;
65+
print_tensor<T>(param.x);
66+
std::cout << "scale: " << scale << std::endl;
67+
std::cout << "bias: " << bias << std::endl;
68+
std::cout << "Output: " << std::endl;
69+
print_tensor<T>(param.output);
5270
}
5371

5472
virtual ~ScaleCompute() = default;

lite/kernels/x86/slice_compute.cc

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,25 @@ REGISTER_LITE_KERNEL(slice,
3333
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kFloat))})
3434
.Finalize();
3535

36+
REGISTER_LITE_KERNEL(slice,
37+
kX86,
38+
kFloat,
39+
kNCHW,
40+
paddle::lite::kernels::x86::SliceCompute<float>,
41+
array_def)
42+
.BindInput("Input",
43+
{LiteType::GetTensorListTy(TARGET(kX86), PRECISION(kFloat))})
44+
.BindInput("StartsTensor",
45+
{LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
46+
.BindInput("EndsTensor",
47+
{LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
48+
.BindInput("StartsTensorList",
49+
{LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
50+
.BindInput("EndsTensorList",
51+
{LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
52+
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kFloat))})
53+
.Finalize();
54+
3655
REGISTER_LITE_KERNEL(slice,
3756
kX86,
3857
kFloat,
@@ -52,6 +71,25 @@ REGISTER_LITE_KERNEL(slice,
5271
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt32))})
5372
.Finalize();
5473

74+
REGISTER_LITE_KERNEL(slice,
75+
kX86,
76+
kFloat,
77+
kNCHW,
78+
paddle::lite::kernels::x86::SliceCompute<int>,
79+
array_int32)
80+
.BindInput("Input",
81+
{LiteType::GetTensorListTy(TARGET(kX86), PRECISION(kInt32))})
82+
.BindInput("StartsTensor",
83+
{LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
84+
.BindInput("EndsTensor",
85+
{LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
86+
.BindInput("StartsTensorList",
87+
{LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
88+
.BindInput("EndsTensorList",
89+
{LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
90+
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt32))})
91+
.Finalize();
92+
5593
REGISTER_LITE_KERNEL(slice,
5694
kX86,
5795
kFloat,
@@ -70,3 +108,22 @@ REGISTER_LITE_KERNEL(slice,
70108
{LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
71109
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))})
72110
.Finalize();
111+
112+
REGISTER_LITE_KERNEL(slice,
113+
kX86,
114+
kFloat,
115+
kNCHW,
116+
paddle::lite::kernels::x86::SliceCompute<int64_t>,
117+
array_int64)
118+
.BindInput("Input",
119+
{LiteType::GetTensorListTy(TARGET(kX86), PRECISION(kInt64))})
120+
.BindInput("StartsTensor",
121+
{LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
122+
.BindInput("EndsTensor",
123+
{LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
124+
.BindInput("StartsTensorList",
125+
{LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
126+
.BindInput("EndsTensorList",
127+
{LiteType::GetTensorTy(TARGET(kX86), PRECISION(kAny))})
128+
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))})
129+
.Finalize();

lite/kernels/x86/slice_compute.h

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,88 @@ namespace lite {
2828
namespace kernels {
2929
namespace x86 {
3030

31+
static void print_tensor_array(const std::vector<lite::Tensor>* XTensorList) {
32+
std::cout << "*********tensor array********" << std::endl;
33+
for (int i = 0; i < XTensorList->size(); i++) {
34+
auto tensor = XTensorList->at(i);
35+
std::cout << "Tensor " << i << " len: " << tensor.data_size() << std::endl;
36+
for (int j = 0; j < tensor.data_size() && j < 10; j++) {
37+
std::cout << tensor.mutable_data<float>()[j] << " ";
38+
}
39+
std::cout << std::endl;
40+
}
41+
}
42+
43+
static void print_tensor(lite::Tensor* Tensor) {
44+
std::cout << "*********tensor********" << std::endl;
45+
float* data = Tensor->mutable_data<float>();
46+
std::cout << "Tensor len: " << Tensor->data_size() << std::endl;
47+
for (int j = 0; j < Tensor->data_size() && j < 10; j++) {
48+
std::cout << data[j] << " ";
49+
}
50+
std::cout << std::endl;
51+
}
52+
53+
void DealTensorArray(const std::vector<lite::Tensor>* XTensorList,
54+
std::vector<lite::Tensor>* OutTensorList,
55+
lite::Tensor* Out,
56+
const std::vector<int>& starts,
57+
const std::vector<int>& ends,
58+
bool out_is_array) {
59+
auto in_array = XTensorList;
60+
// If the input is LoDTensorArray, the rank of input is 1.
61+
int64_t in_size = in_array->size();
62+
int64_t start = starts[0] < 0 ? (starts[0] + in_size) : starts[0];
63+
int64_t end = ends[0] < 0 ? (ends[0] + in_size) : ends[0];
64+
65+
start = std::max(start, static_cast<int64_t>(0));
66+
end = std::max(end, static_cast<int64_t>(0));
67+
end = std::min(end, in_size);
68+
69+
CHECK_GT(end, start) << "end should greater than start";
70+
int64_t out_size = end - start;
71+
72+
std::cout << "starts: " << std::endl;
73+
for (int i = 0; i < starts.size(); i++) {
74+
std::cout << starts[i] << " ";
75+
}
76+
std::cout << std::endl;
77+
std::cout << "ends: " << std::endl;
78+
for (int i = 0; i < ends.size(); i++) {
79+
std::cout << ends[i] << " ";
80+
}
81+
std::cout << std::endl;
82+
83+
if (out_is_array) {
84+
auto out_array = OutTensorList;
85+
out_array->resize(out_size);
86+
for (int i = 0; i < out_size; ++i) {
87+
auto* out_tensor = &out_array->at(i);
88+
auto in_tensor = in_array->at(i + start);
89+
out_tensor->set_lod(in_tensor.lod());
90+
if (in_tensor.memory_size() > 0) {
91+
out_tensor->CopyDataFrom(in_tensor);
92+
} else {
93+
VLOG(4) << "WARNING: The input tensor 'x_tensor' holds no memory, so "
94+
"nothing has been written to output array["
95+
<< i << "].";
96+
}
97+
}
98+
} else {
99+
auto out_tensor = Out;
100+
auto in_tensor = in_array->at(start);
101+
out_tensor->CopyDataFrom(in_tensor);
102+
}
103+
std::cout << "input array:" << std::endl;
104+
print_tensor_array(XTensorList);
105+
if (out_is_array) {
106+
std::cout << "out array:" << std::endl;
107+
print_tensor_array(OutTensorList);
108+
} else {
109+
print_tensor(Out);
110+
}
111+
}
112+
31113
inline std::vector<int> GetIntDataFromTensorList(
32114
const std::vector<lite::Tensor*>& list_tensor) {
33115
std::vector<int> vec_data;
@@ -219,6 +301,8 @@ void slice_compute(const lite::Tensor* in,
219301
template <class T>
220302
void slice_compute_(const lite::Tensor* Input,
221303
lite::Tensor* Out,
304+
const std::vector<lite::Tensor>* XTensorList,
305+
std::vector<lite::Tensor>* OutTensorList,
222306
std::vector<int> axes,
223307
std::vector<int> starts,
224308
std::vector<int> ends,
@@ -228,6 +312,38 @@ void slice_compute_(const lite::Tensor* Input,
228312
std::vector<lite::Tensor*> StartsTensorList,
229313
std::vector<lite::Tensor*> EndsTensorList,
230314
std::vector<int> infer_flags) {
315+
if (Input == nullptr && XTensorList != nullptr) {
316+
bool need_infer = false;
317+
if (StartsTensor || EndsTensor) {
318+
need_infer = true;
319+
}
320+
if (StartsTensorList.size() > 0 || EndsTensorList.size() > 0) {
321+
need_infer = true;
322+
}
323+
if (need_infer) {
324+
if (StartsTensor) {
325+
starts = GetIntDataFromTensor(StartsTensor);
326+
} else if (StartsTensorList.size() > 0) {
327+
starts = GetIntDataFromTensorList(StartsTensorList);
328+
}
329+
CHECK_EQ(starts.size(), axes.size())
330+
<< "The size of starts must be equal to the size of axes.";
331+
if (EndsTensor) {
332+
ends = GetIntDataFromTensor(EndsTensor);
333+
} else if (EndsTensorList.size() > 0) {
334+
ends = GetIntDataFromTensorList(EndsTensorList);
335+
}
336+
CHECK_EQ(ends.size(), axes.size())
337+
<< "The size of starts must be equal to the size of axes.";
338+
}
339+
DealTensorArray(XTensorList,
340+
OutTensorList,
341+
Out,
342+
starts,
343+
ends,
344+
(Out == nullptr && OutTensorList != nullptr));
345+
return;
346+
}
231347
int rank = Input->dims().size();
232348
switch (rank) {
233349
case 1:
@@ -320,6 +436,8 @@ class SliceCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
320436
auto& param = *param_.get_mutable<param_t>();
321437
slice_compute_<T>(param.X,
322438
param.Out,
439+
param.XTensorList,
440+
param.OutTensorList,
323441
param.axes,
324442
param.starts,
325443
param.ends,

lite/kernels/xpu/scale_compute.cc

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,23 @@ namespace lite {
2121
namespace kernels {
2222
namespace xpu {
2323

24+
template <typename T>
25+
static void print_tensor(lite::Tensor* Tensor) {
26+
std::cout << "*********tensor********" << std::endl;
27+
lite::Tensor tmp;
28+
tmp.Resize(Tensor->dims());
29+
TargetWrapperXPU::MemcpySync(tmp.mutable_data<T>(),
30+
Tensor->raw_data(),
31+
Tensor->memory_size(),
32+
IoDirection::DtoH);
33+
T* data = tmp.mutable_data<T>();
34+
std::cout << "Tensor len: " << tmp.data_size() << std::endl;
35+
for (int j = 0; j < tmp.data_size() && j < 10; j++) {
36+
std::cout << data[j] << " ";
37+
}
38+
std::cout << std::endl;
39+
}
40+
2441
template <typename T>
2542
void ScaleCompute<T>::Run() {
2643
auto& param = this->template Param<param_t>();
@@ -45,6 +62,13 @@ void ScaleCompute<T>::Run() {
4562
if (!param.x->lod().empty()) {
4663
param.output->set_lod(param.x->lod());
4764
}
65+
std::cout << "********scale********" << std::endl;
66+
std::cout << "Input: " << std::endl;
67+
print_tensor<T>(param.x);
68+
std::cout << "scale: " << param.scale << std::endl;
69+
std::cout << "bias: " << param.bias << std::endl;
70+
std::cout << "Output: " << std::endl;
71+
print_tensor<T>(param.output);
4872
}
4973

5074
} // namespace xpu
@@ -71,3 +95,13 @@ REGISTER_LITE_KERNEL(scale,
7195
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
7296
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt32))})
7397
.Finalize();
98+
99+
REGISTER_LITE_KERNEL(scale,
100+
kXPU,
101+
kFloat,
102+
kNCHW,
103+
paddle::lite::kernels::xpu::ScaleCompute<int64_t>,
104+
int64)
105+
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt64))})
106+
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU), PRECISION(kInt64))})
107+
.Finalize();

0 commit comments

Comments
 (0)