[HuaweiAscendNPU] Support roll op (#9976)

shentanyue · web-flow · commit 1568c7a056df · 2023-02-08T17:33:23.000+08:00
diff --git a/docs/develop_guides/nnadapter.md b/docs/develop_guides/nnadapter.md
@@ -1679,6 +1679,16 @@ typedef struct Device {
   - 输出：
     - 0 ： output ，输出操作数，形状： [N, C, output_height, output_width] ，类型与输入操作数 `input` 相同。
 
+- NNADAPTER_ROLL
+
+  沿给定维度滚动张量输入。超出最后位置的元素将在第一个位置重新引入，如果不设置 axes，则张量将在滚动之前展开变平，然后恢复为原始形状。。
+  - 输入 ：
+    - 0 ： input ，输入操作数，类型： NNADAPTER_FLOAT32 、 NNADAPTER_QUANT_INT8_SYMM_PER_LAYER 。
+    - 1 ： shifts ，表示张量元素移动的位置数，给定的单个或多个轴，形状：任意一维操作数，类型： NNADAPTER_INT32 。
+    - 2 ： axes ，表示滚动的单个或多个轴，形状：任意一维操作数，类型： NNADAPTER_INT32 。
+  - 输出 ：
+    - 0 ： output ，输出操作数，与输入操作数 `input` 的形状和类型相同。
+
 - NNADAPTER_SHAPE
 
   获得输入的形状。
diff --git a/lite/backends/nnadapter/nnadapter/include/nnadapter/nnadapter.h b/lite/backends/nnadapter/nnadapter/include/nnadapter/nnadapter.h
@@ -978,15 +978,15 @@ typedef enum {
    * (a mini-batch of 2D inputs with additional channel dimension)
    * as described in the paper Group Normalization.
    *
-  * Inputs:
+   * Inputs:
    * * 0: input, a NNADAPTER_FLOAT32, NNADAPTER_QUANT_INT8_SYMM_PER_LAYER tensor
    * of shape [N, C, ...].
    * * 1: scale, a NNADAPTER_FLOAT32 tensor of shape [C].
    * * 2: bias, a NNADAPTER_FLOAT32 tensor of shape [C].
    * * 3: epsilon, a NNADAPTER_FLOAT32 tensor of shape [1], a small value added
    * to the variance to prevent division by zero, defaults to 1e-5.
    * * 4: groups, a NNADAPTER_INT32 tensor of shape [1], the number of groups
-  * that divided from channels.
+   * that divided from channels.
    *
    * Outputs:
    * * 0: output, a tensor of the same shape and type as `input`.
@@ -1630,19 +1630,19 @@ typedef enum {
   NNADAPTER_REDUCE_SUM,
 
   /**
-  * Performs element-wise rectified linear activation.
-  * The output is calculated using this formula:
-  *     `output` = max(0, `input`)
-  *
-  * Inputs:
-  * * 0: input, a NNADAPTER_FLOAT32,
-  * NNADAPTER_QUANT_INT8_SYMM_PER_LAYER tensor.
-  *
-  * Outputs:
-  * * 0: output, a tensor of the same shape and type as `input`.
-  *
-  * Available since version 1.
-  */
+   * Performs element-wise rectified linear activation.
+   * The output is calculated using this formula:
+   *     `output` = max(0, `input`)
+   *
+   * Inputs:
+   * * 0: input, a NNADAPTER_FLOAT32,
+   * NNADAPTER_QUANT_INT8_SYMM_PER_LAYER tensor.
+   *
+   * Outputs:
+   * * 0: output, a tensor of the same shape and type as `input`.
+   *
+   * Available since version 1.
+   */
   NNADAPTER_RELU,
 
   /**
@@ -1764,6 +1764,28 @@ typedef enum {
    */
   NNADAPTER_ROI_ALIGN,
 
+  /**
+   * Roll the tensor input along the given dimension(s). Elements that are
+   * shifted beyond the last position are re-introduced at the first position.
+   * If axes is None, the tensor will be flattened before rolling and then
+   * restored to the original shape.
+   *
+   * Inputs:
+   * * 0: input, a NNADAPTER_FLOAT32, NNADAPTER_QUANT_INT8_SYMM_PER_LAYER
+   * tensor.
+   * * 1: shifts, a NNADAPTER_TENSOR_INT32 tensor, It
+   * represents the number of places by which the elements of the tensor are
+   * shifted.
+   * * 2: axes, a NNADAPTER_TENSOR_INT32 tensor, It
+   * represents axis along which to roll.
+   *
+   * Outputs:
+   * * 0: output, a tensor of the same shape and type as `input`.
+   *
+   * Available since version 1.
+   */
+  NNADAPTER_ROLL,
+
   /**
    * Outputs an 1-D tensor containing the shape of the input tensor.
    *
@@ -1909,19 +1931,19 @@ typedef enum {
   NNADAPTER_SPLIT,
 
   /**
-  * Performs element-wise square operation.
-  * The output is calculated using this formula:
-  *     `output` = `input`^2
-  *
-  * Inputs:
-  * * 0: input, a NNADAPTER_FLOAT32,
-  * NNADAPTER_QUANT_INT8_SYMM_PER_LAYER tensor.
-  *
-  * Outputs:
-  * * 0: output, a tensor of the same shape and type as `input`.
-  *
-  * Available since version 1.
-  */
+   * Performs element-wise square operation.
+   * The output is calculated using this formula:
+   *     `output` = `input`^2
+   *
+   * Inputs:
+   * * 0: input, a NNADAPTER_FLOAT32,
+   * NNADAPTER_QUANT_INT8_SYMM_PER_LAYER tensor.
+   *
+   * Outputs:
+   * * 0: output, a tensor of the same shape and type as `input`.
+   *
+   * Available since version 1.
+   */
   NNADAPTER_SQUARE,
 
   /**
diff --git a/lite/backends/nnadapter/nnadapter/include/nnadapter/operation/roll.h b/lite/backends/nnadapter/nnadapter/include/nnadapter/operation/roll.h
@@ -0,0 +1,63 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <vector>
+
+namespace nnadapter {
+namespace operation {
+
+#define ROLL_OPERATION_EXTRACT_INPUTS_OUTPUTS                               \
+  auto& input_operands = operation->input_operands;                         \
+  auto& output_operands = operation->output_operands;                       \
+  auto input_count = input_operands.size();                                 \
+  auto output_count = output_operands.size();                               \
+  NNADAPTER_CHECK_EQ(input_count, 3);                                       \
+  NNADAPTER_CHECK_EQ(output_count, 1);                                      \
+  /* Input */                                                               \
+  auto input_operand = input_operands[0];                                   \
+  NNADAPTER_VLOG(5) << "input: " << OperandToString(input_operand);         \
+  /* Shifts */                                                              \
+  auto shifts_operand = input_operands[1];                                  \
+  std::vector<int32_t> shifts;                                              \
+  if (IsConstantOperand(shifts_operand)) {                                  \
+    auto shifts_count = shifts_operand->length / sizeof(int32_t);           \
+    auto shifts_data = reinterpret_cast<int32_t*>(shifts_operand->buffer);  \
+    shifts = std::vector<int32_t>(shifts_data, shifts_data + shifts_count); \
+    for (size_t i = 0; i < shifts.size(); i++) {                            \
+      NNADAPTER_VLOG(5) << "shifts[" << i << "]: " << shifts[i];            \
+    }                                                                       \
+  } else {                                                                  \
+    NNADAPTER_VLOG(5) << "shifts: " << OperandToString(shifts_operand);     \
+  }                                                                         \
+  /* Axes */                                                                \
+  auto axes_operand = input_operands[2];                                    \
+  std::vector<int32_t> axes;                                                \
+  if (IsConstantOperand(axes_operand)) {                                    \
+    auto axes_count = axes_operand->length / sizeof(int32_t);               \
+    auto axes_data = reinterpret_cast<int32_t*>(axes_operand->buffer);      \
+    axes = std::vector<int32_t>(axes_data, axes_data + axes_count);         \
+    for (size_t i = 0; i < axes.size(); i++) {                              \
+      NNADAPTER_VLOG(5) << "axes[" << i << "]: " << axes[i];                \
+    }                                                                       \
+  } else {                                                                  \
+    NNADAPTER_VLOG(5) << "axes: " << OperandToString(axes_operand);         \
+  }                                                                         \
+  /* Output */                                                              \
+  auto output_operand = output_operands[0];                                 \
+  NNADAPTER_VLOG(5) << "output: " << OperandToString(output_operand);
+
+}  // namespace operation
+}  // namespace nnadapter
diff --git a/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/all.h b/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/all.h
@@ -83,6 +83,7 @@ REGISTER_CONVERTER(RESHAPE, ConvertReshape)
 REGISTER_CONVERTER(RESIZE_LINEAR, ConvertResizeLinear)
 REGISTER_CONVERTER(RESIZE_NEAREST, ConvertResizeNearest)
 REGISTER_CONVERTER(ROI_ALIGN, ConvertRoiAlign)
+REGISTER_CONVERTER(ROLL, ConvertRoll)
 REGISTER_CONVERTER(SHAPE, ConvertShape)
 REGISTER_CONVERTER(SIGMOID, ConvertUnaryActivations)
 REGISTER_CONVERTER(SIN, ConvertUnaryActivations)
diff --git a/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/roll.cc b/lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/roll.cc
@@ -0,0 +1,49 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "operation/roll.h"
+#include "driver/huawei_ascend_npu/converter/converter.h"
+#include "utility/debug.h"
+#include "utility/logging.h"
+#include "utility/modeling.h"
+
+namespace nnadapter {
+namespace huawei_ascend_npu {
+
+int ConvertRoll(Converter* converter, core::Operation* operation) {
+  ROLL_OPERATION_EXTRACT_INPUTS_OUTPUTS
+
+  // Convert to GE operators
+  auto input_operator = converter->GetMappedOperator(input_operand);
+  if (!input_operator) {
+    input_operator = converter->ConvertOperand(input_operand);
+  }
+  auto shifts_operator = converter->GetMappedOperator(shifts_operand);
+  if (!shifts_operator) {
+    shifts_operator = converter->ConvertOperand(shifts_operand);
+  }
+  auto axes_operator = converter->GetMappedOperator(axes_operand);
+  if (!axes_operator) {
+    axes_operator = converter->ConvertOperand(axes_operand);
+  }
+  auto roll_op = converter->AddOperator<ge::op::RollV2>(output_operand);
+  SET_INPUT(roll_op, input, input_operator);
+  SET_INPUT(roll_op, shift, shifts_operator);
+  SET_INPUT(roll_op, axes, axes_operator);
+  MAP_OUTPUT(roll_op, output, output_operand);
+  return NNADAPTER_NO_ERROR;
+}
+
+}  // namespace huawei_ascend_npu
+}  // namespace nnadapter
diff --git a/lite/backends/nnadapter/nnadapter/src/operation/all.h b/lite/backends/nnadapter/nnadapter/src/operation/all.h
@@ -219,6 +219,7 @@ REGISTER_OPERATION(ROI_ALIGN,
                    ValidateRoiAlign,
                    PrepareRoiAlign,
                    ExecuteRoiAlign)
+REGISTER_OPERATION(ROLL, ValidateRoll, PrepareRoll, ExecuteRoll)
 REGISTER_OPERATION(SHAPE, ValidateShape, PrepareShape, ExecuteShape)
 REGISTER_OPERATION(SIGMOID,
                    ValidateUnaryActivations,
diff --git a/lite/backends/nnadapter/nnadapter/src/operation/roll.cc b/lite/backends/nnadapter/nnadapter/src/operation/roll.cc
@@ -0,0 +1,44 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "operation/roll.h"
+#include "core/types.h"
+#include "utility/debug.h"
+#include "utility/logging.h"
+#include "utility/micros.h"
+#include "utility/modeling.h"
+#include "utility/utility.h"
+
+namespace nnadapter {
+namespace operation {
+
+NNADAPTER_EXPORT bool ValidateRoll(const core::Operation* operation) {
+  return false;
+}
+
+NNADAPTER_EXPORT int PrepareRoll(core::Operation* operation) {
+  ROLL_OPERATION_EXTRACT_INPUTS_OUTPUTS
+
+  // Infer the shape and type of output operands
+  CopyOperandTypeExceptQuantParams(&output_operand->type, input_operand->type);
+  NNADAPTER_VLOG(5) << "output: " << OperandToString(output_operand);
+  return NNADAPTER_NO_ERROR;
+}
+
+NNADAPTER_EXPORT int ExecuteRoll(core::Operation* operation) {
+  return NNADAPTER_FEATURE_NOT_SUPPORTED;
+}
+
+}  // namespace operation
+}  // namespace nnadapter
diff --git a/lite/backends/nnadapter/nnadapter/src/utility/debug.cc b/lite/backends/nnadapter/nnadapter/src/utility/debug.cc
@@ -563,6 +563,10 @@ NNADAPTER_EXPORT std::string Visualize(core::Model* model) {
                       "score_threshold"};
         output_args = {"Out", "NmsRoisNum", "Index"};
         break;
+      case NNADAPTER_ROLL:
+        input_args = {"input", "shifts", "axes"};
+        output_args = {"output"};
+        break;
       default:
         if (operation->type < 0) {
           input_args.resize(input_count);
@@ -753,6 +757,7 @@ NNADAPTER_EXPORT std::string OperationTypeToString(
     NNADAPTER_TYPE_TO_STRING(RESIZE_NEAREST);
     NNADAPTER_TYPE_TO_STRING(RESIZE_LINEAR);
     NNADAPTER_TYPE_TO_STRING(ROI_ALIGN);
+    NNADAPTER_TYPE_TO_STRING(ROLL);
     NNADAPTER_TYPE_TO_STRING(SHAPE);
     NNADAPTER_TYPE_TO_STRING(SIGMOID);
     NNADAPTER_TYPE_TO_STRING(SIN);
diff --git a/lite/kernels/nnadapter/converter/all.h b/lite/kernels/nnadapter/converter/all.h
@@ -544,6 +544,7 @@ REGISTER_CONVERTER(sin,
                    "huawei_ascend_npu,verisilicon_timvx");
 REGISTER_CONVERTER(cos, ConvertUnaryActivations, "huawei_ascend_npu");
 REGISTER_CONVERTER(silu, ConvertUnaryActivations, "huawei_ascend_npu");
+REGISTER_CONVERTER(roll, ConvertRoll, "huawei_ascend_npu");
 REGISTER_CONVERTER(elementwise_floordiv,
                    ConvertElementwise,
                    "huawei_ascend_npu,verisilicon_timvx");
diff --git a/lite/kernels/nnadapter/converter/roll.cc b/lite/kernels/nnadapter/converter/roll.cc
@@ -0,0 +1,63 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/nnadapter/converter/converter.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace nnadapter {
+
+int ConvertRoll(Converter* converter, OpInfo* op, Scope* scope) {
+  // Input operand
+  auto x_name = op->Input("X").front();
+  auto x_scale_name = "X0_scale";
+  std::vector<float> x_scales;
+  if (op->HasInputScale(x_scale_name, true)) {
+    x_scales = op->GetInputScale(x_scale_name, true);
+  }
+  auto input_operand = converter->AddInputOperand(scope, x_name, {}, x_scales);
+  // Shifts
+  NNAdapterOperand* shifts_operand = nullptr;
+  if (HasInput(op, scope, "ShiftsTensor")) {
+    auto shifts_name = op->Input("ShiftsTensor").front();
+    shifts_operand = converter->AddInputOperand(scope, shifts_name);
+  } else {
+    std::vector<int64_t> shifts = op->GetAttr<std::vector<int64_t>>("shifts");
+    shifts_operand = converter->AddConstantOperand(
+        std::vector<int32_t>(shifts.begin(), shifts.end()));
+  }
+  // Axes
+  std::vector<int64_t> axis = op->GetAttr<std::vector<int64_t>>("axis");
+  auto axes_operand = converter->AddConstantOperand(
+      std::vector<int32_t>(axis.begin(), axis.end()));
+  // Output
+  auto out_name = op->Output("Out").front();
+  auto out_scale_name = "Out0_scale";
+  std::vector<float> out_scales;
+  if (op->HasOutputScale(out_scale_name, true)) {
+    out_scales = op->GetOutputScale(out_scale_name, true);
+  }
+  auto output_operand = converter->AddOutputOperand(out_name, out_scales);
+  // Roll operation
+  converter->AddOperation(NNADAPTER_ROLL,
+                          {input_operand, shifts_operand, axes_operand},
+                          {output_operand});
+  return NO_ERROR;
+}
+
+}  // namespace nnadapter
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
diff --git a/lite/operators/rnn_op.cc b/lite/operators/rnn_op.cc
diff --git a/lite/tests/kernels/roll_compute_test.cc b/lite/tests/kernels/roll_compute_test.cc