Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class NCHW2NHWCDataLayoutConverter {
void ConvertSoftmax(core::Operation* operation);
void ConvertSplit(core::Operation* operation);
void ConvertSqueeze(core::Operation* operation);
void ConvertStack(core::Operation* operation);
void ConvertTranspose(core::Operation* operation);
void ConvertMatMul(core::Operation* operation);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,71 @@ void NCHW2NHWCDataLayoutConverter::ConvertSoftmax(core::Operation* operation) {
SetPermutation(output_operand, input_permutation);
}

void NCHW2NHWCDataLayoutConverter::ConvertSqueeze(core::Operation* operation) {
auto& input_operands = operation->input_operands;
auto& output_operands = operation->output_operands;
auto input_count = input_operands.size();
auto output_count = output_operands.size();
NNADAPTER_CHECK_EQ(input_count, 2);
NNADAPTER_CHECK_EQ(output_count, 1);
auto input_operand = input_operands[0];
auto input_dimensions_count = input_operand->type.dimensions.count;
auto output_operand = output_operands[0];
auto output_dimensions_count = output_operand->type.dimensions.count;
auto axes_operand = input_operands[1];
// Recalculate the perm according to the dimorder vector of the input operand
auto input_permutation = GetPermutation(input_operand);
Copy link
Collaborator Author

@shentanyue shentanyue Jul 4, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

squeeze的实现逻辑可以理解如下:例如初始 input为(1,3,1,5),初始axes为(0,2),input_permutation为(0,2,3,1),则经过input_permutation变化的axes为(0,1)

首先使用原始和经过input_permutation变化后的axes数据,分别将 Identity_permutation 和 input_permutation 对应的维度去掉,
即input_permutation从(0,2,3,1)变为(3,1),Identity_permutation从(0,1,2,3)变为(1,3)。

其次再将 input_permutation 与 Identity_permutation 做对比,若一致不做数据transpose,若不一致,例如(3,1)和(1,3)不一致,则会根据相应的关系处理得到output_permutation为(1,0)。

std::vector<int32_t> identity_permutation =
IdentityPermutation(input_dimensions_count);
std::vector<int32_t> axes;
if (axes_operand && (axes_operand->length / sizeof(int32_t)) > 0) {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

squeeze的axes有两种场景:
1、当axes_operand不为空,且数据不为空,则根据axes做降维操作。
2、当axes_operand为空或数据为空,则会将input所有为1的维度做降维度

auto axes_count = axes_operand->length / sizeof(int32_t);
auto axes_data = reinterpret_cast<int32_t*>(axes_operand->buffer);
// Recalculate the axes according to the dimorder vector of the input
// operand
for (int32_t i = 0; i < axes_count; i++) {
if (axes_data[i] < 0) {
axes_data[i] += input_dimensions_count;
}
// Delete the dimension corresponding to the axis of the
// identity_permutation
for (auto it = identity_permutation.begin();
it != identity_permutation.end();) {
if (*it == axes_data[i]) {
it = identity_permutation.erase(it);
} else {
++it;
}
}
// Delete the dimension corresponding to the axis of the input_permutation
TransposeAxis(axes_data[i], input_permutation);
for (auto it = input_permutation.begin();
it != input_permutation.end();) {
if (*it == axes_data[i]) {
it = input_permutation.erase(it);
} else {
++it;
}
}
}
// Calculate the distance between current data layout and origin data layout
std::vector<int32_t> output_permutation;
for (auto identity_data : identity_permutation) {
int32_t index = std::distance(input_permutation.begin(),
std::find(input_permutation.begin(),
input_permutation.end(),
identity_data));
output_permutation.push_back(index);
}
TransposeOperand(output_operand, output_permutation);
SetPermutation(output_operand, output_permutation);
} else {
// Skip NCHW2NHWC conversion
SetPermutation(output_operand,
IdentityPermutation(output_dimensions_count));
}
}

void NCHW2NHWCDataLayoutConverter::ConvertSplit(core::Operation* operation) {
auto& input_operands = operation->input_operands;
auto& output_operands = operation->output_operands;
Expand All @@ -727,6 +792,70 @@ void NCHW2NHWCDataLayoutConverter::ConvertSplit(core::Operation* operation) {
}
}

void NCHW2NHWCDataLayoutConverter::ConvertStack(core::Operation* operation) {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

复用了ConvertConcat的逻辑,不同点在于stack会在axis维度上堆叠,所以insert了一个维度,并将原先的perm大于axis的维度值做了变化,保证transpose的次序不变。

    for (auto& perm_data : reference_permutation) {
      if (perm_data >= *axis) perm_data += 1;
    }
    reference_permutation.insert(reference_permutation.begin() + *axis, *axis);

auto& input_operands = operation->input_operands;
auto& output_operands = operation->output_operands;
auto input_count = input_operands.size();
auto output_count = output_operands.size();
NNADAPTER_CHECK_GE(input_count, 2);
NNADAPTER_CHECK_EQ(output_count, 1);
auto* axis =
reinterpret_cast<int32_t*>(input_operands[input_count - 1]->buffer);
if (*axis < 0) {
*axis += input_operands[0]->type.dimensions.count;
}
auto output_operand = output_operands[0];
auto output_dimensions_count = output_operand->type.dimensions.count;
// Force to align the dimorder vector of all of input operands
std::vector<int32_t> reference_permutation;
core::Operand* reference_operand = nullptr;
for (size_t i = 0; i < input_count - 1; i++) {
auto input_operand = input_operands[i];
if (!IsConstantOperand(input_operand)) {
auto input_permutation = GetPermutation(input_operand);
if (input_permutation.size() > reference_permutation.size()) {
reference_permutation = input_permutation;
reference_operand = input_operand;
}
}
}
if (reference_permutation.empty()) {
// All of input operands are constant
SetPermutation(output_operand,
IdentityPermutation(output_dimensions_count));
} else {
for (size_t i = 0; i < input_count - 1; i++) {
auto input_operand = input_operands[i];
if (!IsConstantOperand(input_operand)) {
auto input_permutation = GetPermutation(input_operand);
auto transpose_input_permutation = MultiplyPermutation(
InversePermutation(input_permutation), reference_permutation);
if (!IsIdentityPermutation(transpose_input_permutation)) {
auto transpose_input_operand = AppendTransposeOperation(
model_, input_operand, transpose_input_permutation);
UpdateOperationInputOperands(
{operation}, input_operand, transpose_input_operand);
SetPermutation(transpose_input_operand, reference_permutation);
}
} else {
if (IsIdentityPermutation(reference_permutation)) {
// Ignore
} else {
NNADAPTER_CHECK_EQ(input_operand->type.dimensions.count,
reference_permutation.size());
TransposeOperand(input_operand, reference_permutation);
}
}
}
for (auto& perm_data : reference_permutation) {
if (perm_data >= *axis) perm_data += 1;
}
reference_permutation.insert(reference_permutation.begin() + *axis, *axis);
TransposeOperand(output_operand, reference_permutation);
SetPermutation(output_operand, reference_permutation);
}
}

void NCHW2NHWCDataLayoutConverter::ConvertTranspose(
core::Operation* operation) {
auto& input_operands = operation->input_operands;
Expand Down Expand Up @@ -856,9 +985,15 @@ void NCHW2NHWCDataLayoutConverter::Apply(core::Model* model) {
case NNADAPTER_SOFTMAX:
ConvertSoftmax(operation);
break;
case NNADAPTER_SQUEEZE:
ConvertSqueeze(operation);
break;
case NNADAPTER_SPLIT:
ConvertSplit(operation);
break;
case NNADAPTER_STACK:
ConvertStack(operation);
break;
case NNADAPTER_TRANSPOSE:
ConvertTranspose(operation);
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ NNADAPTER_EXPORT void ConvertQuantizationSymmToAsymm(core::Model* model) {
ConvertOperandSymmToAsymm(output_operands[0], 128);
PropagateAsymmZeroPoint(input_operands[0], output_operands[0]);
} break;
case NNADAPTER_CONCAT: {
case NNADAPTER_CONCAT:
case NNADAPTER_STACK: {
NNADAPTER_CHECK_GE(input_count, 2);
for (int i = 0; i < input_count - 1; i++) {
ConvertOperandSymmToAsymm(input_operands[i], 128);
Expand Down
41 changes: 21 additions & 20 deletions lite/kernels/nnadapter/converter/all.h
Original file line number Diff line number Diff line change
Expand Up @@ -307,20 +307,21 @@ REGISTER_CONVERTER(slice,
REGISTER_CONVERTER(strided_slice,
ConvertStridedSlice,
"huawei_ascend_npu,huawei_kirin_npu,nvidia_tensorrt");
REGISTER_CONVERTER(
squeeze,
ConvertSqueeze,
"huawei_ascend_npu,verisilicon_timvx,kunlunxin_xtcl"
"cambricon_mlu,huawei_kirin_npu,nvidia_tensorrt,intel_openvino");
REGISTER_CONVERTER(
squeeze2,
ConvertSqueeze,
"huawei_ascend_npu,verisilicon_timvx,kunlunxin_xtcl,"
"cambricon_mlu,huawei_kirin_npu,nvidia_tensorrt,intel_openvino");
REGISTER_CONVERTER(squeeze,
ConvertSqueeze,
"huawei_ascend_npu,verisilicon_timvx,kunlunxin_xtcl"
"cambricon_mlu,huawei_kirin_npu,nvidia_tensorrt,intel_"
"openvino,qualcomm_qnn");
REGISTER_CONVERTER(squeeze2,
ConvertSqueeze,
"huawei_ascend_npu,verisilicon_timvx,kunlunxin_xtcl,"
"cambricon_mlu,huawei_kirin_npu,nvidia_tensorrt,intel_"
"openvino,qualcomm_qnn");
REGISTER_CONVERTER(range, ConvertRange, "huawei_ascend_npu,intel_openvino");
REGISTER_CONVERTER(stack,
ConvertStack,
"huawei_ascend_npu,nvidia_tensorrt,intel_openvino");
REGISTER_CONVERTER(
stack,
ConvertStack,
"huawei_ascend_npu,nvidia_tensorrt,intel_openvino,qualcomm_qnn");
REGISTER_CONVERTER(
fill_constant,
ConvertFillConstant,
Expand All @@ -331,18 +332,18 @@ REGISTER_CONVERTER(fill_any_like,
REGISTER_CONVERTER(fill_constant_batch_size_like,
ConvertFillConstantBatchSizeLike,
"huawei_ascend_npu,verisilicon_timvx,intel_openvino");
REGISTER_CONVERTER(
concat,
ConvertConcat,
"rockchip_npu,mediatek_apu,huawei_kirin_npu,huawei_ascend_"
"npu,amlogic_npu,verisilicon_timvx,kunlunxin_xtcl,cambricon_"
"mlu,android_nnapi,nvidia_tensorrt,intel_openvino,eeasytech_npu");
REGISTER_CONVERTER(concat,
ConvertConcat,
"rockchip_npu,mediatek_apu,huawei_kirin_npu,huawei_ascend_"
"npu,amlogic_npu,verisilicon_timvx,kunlunxin_xtcl,cambricon_"
"mlu,android_nnapi,nvidia_tensorrt,intel_openvino,eeasytech_"
"npu,qualcomm_qnn");
REGISTER_CONVERTER(
split,
ConvertSplit,
"huawei_kirin_npu,huawei_ascend_npu,kunlunxin_xtcl,"
"verisilicon_timvx,cambricon_mlu,nvidia_tensorrt,intel_openvino,"
"eeasytech_npu");
"eeasytech_npu,qualcomm_qnn");
REGISTER_CONVERTER(calib, ConvertCalib, "huawei_ascend_npu,cambricon_mlu");
REGISTER_CONVERTER(nearest_interp,
ConvertInterpolate,
Expand Down
3 changes: 3 additions & 0 deletions lite/tests/kernels/concat_compute_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,9 @@ TEST(Concat, precision) {
#elif defined(NNADAPTER_WITH_INTEL_OPENVINO)
abs_error = 1e-5;
use_axis_tensor = std::vector<bool>{false};
#elif defined(NNADAPTER_WITH_QUALCOMM_QNN)
abs_error = 2e-5;
use_axis_tensor = std::vector<bool>{false};
#else
return;
#endif
Expand Down
8 changes: 8 additions & 0 deletions lite/tests/kernels/split_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,14 @@ TEST(Split_test, precision) {
TestSplitSections(place, abs_error);
TestSplitAxisTensor(place, abs_error);
TestSplitSectionsTensorList(place, abs_error);
#elif defined(NNADAPTER_WITH_QUALCOMM_QNN)
abs_error = 1e-5;
TestSplitBase<float>(place, abs_error);
TestSplitAxis(place, abs_error);
TestSplitNum(place, abs_error);
TestSplitSections(place, abs_error);
TestSplitAxisTensor(place, abs_error);
TestSplitSectionsTensorList(place, abs_error);
#else
return;
#endif
Expand Down
4 changes: 4 additions & 0 deletions lite/tests/kernels/squeeze_compute_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,8 @@ TEST(squeeze, precision) {
abs_error = 5e-2;
#elif defined(NNADAPTER_WITH_INTEL_OPENVINO)
abs_error = 1e-5;
#elif defined(NNADAPTER_WITH_QUALCOMM_QNN)
abs_error = 1e-5;
#else
return;
#endif
Expand Down Expand Up @@ -295,6 +297,8 @@ TEST(squeeze2, precision) {
abs_error = 5e-2;
#elif defined(NNADAPTER_WITH_INTEL_OPENVINO)
abs_error = 1e-5;
#elif defined(NNADAPTER_WITH_QUALCOMM_QNN)
abs_error = 1e-5;
#else
return;
#endif
Expand Down
2 changes: 2 additions & 0 deletions lite/tests/kernels/stack_compute_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ TEST(Stack, precision) {
abs_error = 2e-5;
#elif defined(NNADAPTER_WITH_INTEL_OPENVINO)
abs_error = 1e-5;
#elif defined(NNADAPTER_WITH_QUALCOMM_QNN)
abs_error = 1e-5;
#else
return;
#endif
Expand Down