Skip to content

Commit 1568c7a

Browse files
authored
[HuaweiAscendNPU] Support roll op (#9976)
1 parent ebcc2e6 commit 1568c7a

12 files changed

Lines changed: 298 additions & 30 deletions

File tree

docs/develop_guides/nnadapter.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1679,6 +1679,16 @@ typedef struct Device {
16791679
- 输出:
16801680
- 0 : output ,输出操作数,形状: [N, C, output_height, output_width] ,类型与输入操作数 `input` 相同。
16811681
1682+
- NNADAPTER_ROLL
1683+
1684+
沿给定维度滚动张量输入。超出最后位置的元素将在第一个位置重新引入,如果不设置 axes,则张量将在滚动之前展开变平,然后恢复为原始形状。。
1685+
- 输入 :
1686+
- 0 : input ,输入操作数,类型: NNADAPTER_FLOAT32 、 NNADAPTER_QUANT_INT8_SYMM_PER_LAYER 。
1687+
- 1 : shifts ,表示张量元素移动的位置数,给定的单个或多个轴,形状:任意一维操作数,类型: NNADAPTER_INT32 。
1688+
- 2 : axes ,表示滚动的单个或多个轴,形状:任意一维操作数,类型: NNADAPTER_INT32 。
1689+
- 输出 :
1690+
- 0 : output ,输出操作数,与输入操作数 `input` 的形状和类型相同。
1691+
16821692
- NNADAPTER_SHAPE
16831693
16841694
获得输入的形状。

lite/backends/nnadapter/nnadapter/include/nnadapter/nnadapter.h

Lines changed: 50 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -978,15 +978,15 @@ typedef enum {
978978
* (a mini-batch of 2D inputs with additional channel dimension)
979979
* as described in the paper Group Normalization.
980980
*
981-
* Inputs:
981+
* Inputs:
982982
* * 0: input, a NNADAPTER_FLOAT32, NNADAPTER_QUANT_INT8_SYMM_PER_LAYER tensor
983983
* of shape [N, C, ...].
984984
* * 1: scale, a NNADAPTER_FLOAT32 tensor of shape [C].
985985
* * 2: bias, a NNADAPTER_FLOAT32 tensor of shape [C].
986986
* * 3: epsilon, a NNADAPTER_FLOAT32 tensor of shape [1], a small value added
987987
* to the variance to prevent division by zero, defaults to 1e-5.
988988
* * 4: groups, a NNADAPTER_INT32 tensor of shape [1], the number of groups
989-
* that divided from channels.
989+
* that divided from channels.
990990
*
991991
* Outputs:
992992
* * 0: output, a tensor of the same shape and type as `input`.
@@ -1630,19 +1630,19 @@ typedef enum {
16301630
NNADAPTER_REDUCE_SUM,
16311631

16321632
/**
1633-
* Performs element-wise rectified linear activation.
1634-
* The output is calculated using this formula:
1635-
* `output` = max(0, `input`)
1636-
*
1637-
* Inputs:
1638-
* * 0: input, a NNADAPTER_FLOAT32,
1639-
* NNADAPTER_QUANT_INT8_SYMM_PER_LAYER tensor.
1640-
*
1641-
* Outputs:
1642-
* * 0: output, a tensor of the same shape and type as `input`.
1643-
*
1644-
* Available since version 1.
1645-
*/
1633+
* Performs element-wise rectified linear activation.
1634+
* The output is calculated using this formula:
1635+
* `output` = max(0, `input`)
1636+
*
1637+
* Inputs:
1638+
* * 0: input, a NNADAPTER_FLOAT32,
1639+
* NNADAPTER_QUANT_INT8_SYMM_PER_LAYER tensor.
1640+
*
1641+
* Outputs:
1642+
* * 0: output, a tensor of the same shape and type as `input`.
1643+
*
1644+
* Available since version 1.
1645+
*/
16461646
NNADAPTER_RELU,
16471647

16481648
/**
@@ -1764,6 +1764,28 @@ typedef enum {
17641764
*/
17651765
NNADAPTER_ROI_ALIGN,
17661766

1767+
/**
1768+
* Roll the tensor input along the given dimension(s). Elements that are
1769+
* shifted beyond the last position are re-introduced at the first position.
1770+
* If axes is None, the tensor will be flattened before rolling and then
1771+
* restored to the original shape.
1772+
*
1773+
* Inputs:
1774+
* * 0: input, a NNADAPTER_FLOAT32, NNADAPTER_QUANT_INT8_SYMM_PER_LAYER
1775+
* tensor.
1776+
* * 1: shifts, a NNADAPTER_TENSOR_INT32 tensor, It
1777+
* represents the number of places by which the elements of the tensor are
1778+
* shifted.
1779+
* * 2: axes, a NNADAPTER_TENSOR_INT32 tensor, It
1780+
* represents axis along which to roll.
1781+
*
1782+
* Outputs:
1783+
* * 0: output, a tensor of the same shape and type as `input`.
1784+
*
1785+
* Available since version 1.
1786+
*/
1787+
NNADAPTER_ROLL,
1788+
17671789
/**
17681790
* Outputs an 1-D tensor containing the shape of the input tensor.
17691791
*
@@ -1909,19 +1931,19 @@ typedef enum {
19091931
NNADAPTER_SPLIT,
19101932

19111933
/**
1912-
* Performs element-wise square operation.
1913-
* The output is calculated using this formula:
1914-
* `output` = `input`^2
1915-
*
1916-
* Inputs:
1917-
* * 0: input, a NNADAPTER_FLOAT32,
1918-
* NNADAPTER_QUANT_INT8_SYMM_PER_LAYER tensor.
1919-
*
1920-
* Outputs:
1921-
* * 0: output, a tensor of the same shape and type as `input`.
1922-
*
1923-
* Available since version 1.
1924-
*/
1934+
* Performs element-wise square operation.
1935+
* The output is calculated using this formula:
1936+
* `output` = `input`^2
1937+
*
1938+
* Inputs:
1939+
* * 0: input, a NNADAPTER_FLOAT32,
1940+
* NNADAPTER_QUANT_INT8_SYMM_PER_LAYER tensor.
1941+
*
1942+
* Outputs:
1943+
* * 0: output, a tensor of the same shape and type as `input`.
1944+
*
1945+
* Available since version 1.
1946+
*/
19251947
NNADAPTER_SQUARE,
19261948

19271949
/**
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include <vector>
18+
19+
namespace nnadapter {
20+
namespace operation {
21+
22+
#define ROLL_OPERATION_EXTRACT_INPUTS_OUTPUTS \
23+
auto& input_operands = operation->input_operands; \
24+
auto& output_operands = operation->output_operands; \
25+
auto input_count = input_operands.size(); \
26+
auto output_count = output_operands.size(); \
27+
NNADAPTER_CHECK_EQ(input_count, 3); \
28+
NNADAPTER_CHECK_EQ(output_count, 1); \
29+
/* Input */ \
30+
auto input_operand = input_operands[0]; \
31+
NNADAPTER_VLOG(5) << "input: " << OperandToString(input_operand); \
32+
/* Shifts */ \
33+
auto shifts_operand = input_operands[1]; \
34+
std::vector<int32_t> shifts; \
35+
if (IsConstantOperand(shifts_operand)) { \
36+
auto shifts_count = shifts_operand->length / sizeof(int32_t); \
37+
auto shifts_data = reinterpret_cast<int32_t*>(shifts_operand->buffer); \
38+
shifts = std::vector<int32_t>(shifts_data, shifts_data + shifts_count); \
39+
for (size_t i = 0; i < shifts.size(); i++) { \
40+
NNADAPTER_VLOG(5) << "shifts[" << i << "]: " << shifts[i]; \
41+
} \
42+
} else { \
43+
NNADAPTER_VLOG(5) << "shifts: " << OperandToString(shifts_operand); \
44+
} \
45+
/* Axes */ \
46+
auto axes_operand = input_operands[2]; \
47+
std::vector<int32_t> axes; \
48+
if (IsConstantOperand(axes_operand)) { \
49+
auto axes_count = axes_operand->length / sizeof(int32_t); \
50+
auto axes_data = reinterpret_cast<int32_t*>(axes_operand->buffer); \
51+
axes = std::vector<int32_t>(axes_data, axes_data + axes_count); \
52+
for (size_t i = 0; i < axes.size(); i++) { \
53+
NNADAPTER_VLOG(5) << "axes[" << i << "]: " << axes[i]; \
54+
} \
55+
} else { \
56+
NNADAPTER_VLOG(5) << "axes: " << OperandToString(axes_operand); \
57+
} \
58+
/* Output */ \
59+
auto output_operand = output_operands[0]; \
60+
NNADAPTER_VLOG(5) << "output: " << OperandToString(output_operand);
61+
62+
} // namespace operation
63+
} // namespace nnadapter

lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/all.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ REGISTER_CONVERTER(RESHAPE, ConvertReshape)
8383
REGISTER_CONVERTER(RESIZE_LINEAR, ConvertResizeLinear)
8484
REGISTER_CONVERTER(RESIZE_NEAREST, ConvertResizeNearest)
8585
REGISTER_CONVERTER(ROI_ALIGN, ConvertRoiAlign)
86+
REGISTER_CONVERTER(ROLL, ConvertRoll)
8687
REGISTER_CONVERTER(SHAPE, ConvertShape)
8788
REGISTER_CONVERTER(SIGMOID, ConvertUnaryActivations)
8889
REGISTER_CONVERTER(SIN, ConvertUnaryActivations)
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "operation/roll.h"
16+
#include "driver/huawei_ascend_npu/converter/converter.h"
17+
#include "utility/debug.h"
18+
#include "utility/logging.h"
19+
#include "utility/modeling.h"
20+
21+
namespace nnadapter {
22+
namespace huawei_ascend_npu {
23+
24+
int ConvertRoll(Converter* converter, core::Operation* operation) {
25+
ROLL_OPERATION_EXTRACT_INPUTS_OUTPUTS
26+
27+
// Convert to GE operators
28+
auto input_operator = converter->GetMappedOperator(input_operand);
29+
if (!input_operator) {
30+
input_operator = converter->ConvertOperand(input_operand);
31+
}
32+
auto shifts_operator = converter->GetMappedOperator(shifts_operand);
33+
if (!shifts_operator) {
34+
shifts_operator = converter->ConvertOperand(shifts_operand);
35+
}
36+
auto axes_operator = converter->GetMappedOperator(axes_operand);
37+
if (!axes_operator) {
38+
axes_operator = converter->ConvertOperand(axes_operand);
39+
}
40+
auto roll_op = converter->AddOperator<ge::op::RollV2>(output_operand);
41+
SET_INPUT(roll_op, input, input_operator);
42+
SET_INPUT(roll_op, shift, shifts_operator);
43+
SET_INPUT(roll_op, axes, axes_operator);
44+
MAP_OUTPUT(roll_op, output, output_operand);
45+
return NNADAPTER_NO_ERROR;
46+
}
47+
48+
} // namespace huawei_ascend_npu
49+
} // namespace nnadapter

lite/backends/nnadapter/nnadapter/src/operation/all.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ REGISTER_OPERATION(ROI_ALIGN,
219219
ValidateRoiAlign,
220220
PrepareRoiAlign,
221221
ExecuteRoiAlign)
222+
REGISTER_OPERATION(ROLL, ValidateRoll, PrepareRoll, ExecuteRoll)
222223
REGISTER_OPERATION(SHAPE, ValidateShape, PrepareShape, ExecuteShape)
223224
REGISTER_OPERATION(SIGMOID,
224225
ValidateUnaryActivations,
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "operation/roll.h"
16+
#include "core/types.h"
17+
#include "utility/debug.h"
18+
#include "utility/logging.h"
19+
#include "utility/micros.h"
20+
#include "utility/modeling.h"
21+
#include "utility/utility.h"
22+
23+
namespace nnadapter {
24+
namespace operation {
25+
26+
NNADAPTER_EXPORT bool ValidateRoll(const core::Operation* operation) {
27+
return false;
28+
}
29+
30+
NNADAPTER_EXPORT int PrepareRoll(core::Operation* operation) {
31+
ROLL_OPERATION_EXTRACT_INPUTS_OUTPUTS
32+
33+
// Infer the shape and type of output operands
34+
CopyOperandTypeExceptQuantParams(&output_operand->type, input_operand->type);
35+
NNADAPTER_VLOG(5) << "output: " << OperandToString(output_operand);
36+
return NNADAPTER_NO_ERROR;
37+
}
38+
39+
NNADAPTER_EXPORT int ExecuteRoll(core::Operation* operation) {
40+
return NNADAPTER_FEATURE_NOT_SUPPORTED;
41+
}
42+
43+
} // namespace operation
44+
} // namespace nnadapter

lite/backends/nnadapter/nnadapter/src/utility/debug.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,10 @@ NNADAPTER_EXPORT std::string Visualize(core::Model* model) {
563563
"score_threshold"};
564564
output_args = {"Out", "NmsRoisNum", "Index"};
565565
break;
566+
case NNADAPTER_ROLL:
567+
input_args = {"input", "shifts", "axes"};
568+
output_args = {"output"};
569+
break;
566570
default:
567571
if (operation->type < 0) {
568572
input_args.resize(input_count);
@@ -753,6 +757,7 @@ NNADAPTER_EXPORT std::string OperationTypeToString(
753757
NNADAPTER_TYPE_TO_STRING(RESIZE_NEAREST);
754758
NNADAPTER_TYPE_TO_STRING(RESIZE_LINEAR);
755759
NNADAPTER_TYPE_TO_STRING(ROI_ALIGN);
760+
NNADAPTER_TYPE_TO_STRING(ROLL);
756761
NNADAPTER_TYPE_TO_STRING(SHAPE);
757762
NNADAPTER_TYPE_TO_STRING(SIGMOID);
758763
NNADAPTER_TYPE_TO_STRING(SIN);

lite/kernels/nnadapter/converter/all.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,7 @@ REGISTER_CONVERTER(sin,
544544
"huawei_ascend_npu,verisilicon_timvx");
545545
REGISTER_CONVERTER(cos, ConvertUnaryActivations, "huawei_ascend_npu");
546546
REGISTER_CONVERTER(silu, ConvertUnaryActivations, "huawei_ascend_npu");
547+
REGISTER_CONVERTER(roll, ConvertRoll, "huawei_ascend_npu");
547548
REGISTER_CONVERTER(elementwise_floordiv,
548549
ConvertElementwise,
549550
"huawei_ascend_npu,verisilicon_timvx");
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "lite/kernels/nnadapter/converter/converter.h"
16+
17+
namespace paddle {
18+
namespace lite {
19+
namespace kernels {
20+
namespace nnadapter {
21+
22+
int ConvertRoll(Converter* converter, OpInfo* op, Scope* scope) {
23+
// Input operand
24+
auto x_name = op->Input("X").front();
25+
auto x_scale_name = "X0_scale";
26+
std::vector<float> x_scales;
27+
if (op->HasInputScale(x_scale_name, true)) {
28+
x_scales = op->GetInputScale(x_scale_name, true);
29+
}
30+
auto input_operand = converter->AddInputOperand(scope, x_name, {}, x_scales);
31+
// Shifts
32+
NNAdapterOperand* shifts_operand = nullptr;
33+
if (HasInput(op, scope, "ShiftsTensor")) {
34+
auto shifts_name = op->Input("ShiftsTensor").front();
35+
shifts_operand = converter->AddInputOperand(scope, shifts_name);
36+
} else {
37+
std::vector<int64_t> shifts = op->GetAttr<std::vector<int64_t>>("shifts");
38+
shifts_operand = converter->AddConstantOperand(
39+
std::vector<int32_t>(shifts.begin(), shifts.end()));
40+
}
41+
// Axes
42+
std::vector<int64_t> axis = op->GetAttr<std::vector<int64_t>>("axis");
43+
auto axes_operand = converter->AddConstantOperand(
44+
std::vector<int32_t>(axis.begin(), axis.end()));
45+
// Output
46+
auto out_name = op->Output("Out").front();
47+
auto out_scale_name = "Out0_scale";
48+
std::vector<float> out_scales;
49+
if (op->HasOutputScale(out_scale_name, true)) {
50+
out_scales = op->GetOutputScale(out_scale_name, true);
51+
}
52+
auto output_operand = converter->AddOutputOperand(out_name, out_scales);
53+
// Roll operation
54+
converter->AddOperation(NNADAPTER_ROLL,
55+
{input_operand, shifts_operand, axes_operand},
56+
{output_operand});
57+
return NO_ERROR;
58+
}
59+
60+
} // namespace nnadapter
61+
} // namespace kernels
62+
} // namespace lite
63+
} // namespace paddle

0 commit comments

Comments
 (0)