diff --git a/lite/kernels/arm/CMakeLists.txt b/lite/kernels/arm/CMakeLists.txt index fab2964877a..afa3b3bec33 100644 --- a/lite/kernels/arm/CMakeLists.txt +++ b/lite/kernels/arm/CMakeLists.txt @@ -80,8 +80,8 @@ add_kernel(generate_proposals_v2_compute_arm ARM extra SRCS generate_proposals_v add_kernel(roi_align_compute_arm ARM extra SRCS roi_align_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(box_clip_compute_arm ARM extra SRCS box_clip_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(assign_value_compute_arm ARM basic SRCS assign_value_compute.cc DEPS ${lite_kernel_deps} math_arm) -add_kernel(collect_fpn_proposals_compute_arm ARM extra SRCS collect_fpn_proposals_compute.cc DEPS ${lite_kernel_deps} math_arm) -add_kernel(distribute_fpn_proposals_compute_arm ARM extra SRCS distribute_fpn_proposals_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(collect_fpn_proposals_compute_arm ARM extra SRCS collect_fpn_proposals_compute.cc DEPS ${lite_kernel_deps} collect_fpn_proposals_compute_host) +add_kernel(distribute_fpn_proposals_compute_arm ARM extra SRCS distribute_fpn_proposals_compute.cc DEPS ${lite_kernel_deps} distribute_fpn_proposals_compute_host) add_kernel(clip_compute_arm ARM extra SRCS clip_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(pixel_shuffle_compute_arm ARM extra SRCS pixel_shuffle_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(scatter_compute_arm ARM extra SRCS scatter_compute.cc DEPS ${lite_kernel_deps} math_arm) diff --git a/lite/kernels/arm/collect_fpn_proposals_compute.cc b/lite/kernels/arm/collect_fpn_proposals_compute.cc index ae2981e5410..bb609cd3058 100644 --- a/lite/kernels/arm/collect_fpn_proposals_compute.cc +++ b/lite/kernels/arm/collect_fpn_proposals_compute.cc @@ -12,168 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "lite/kernels/arm/collect_fpn_proposals_compute.h" -#include -#include -#include -#include "lite/backends/arm/math/funcs.h" -#include "lite/core/op_registry.h" -#include "lite/core/tensor.h" -#include "lite/core/type_system.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace arm { - -struct ScoreWithID { - float score; - int batch_id; - int index; - int level; - ScoreWithID() { - batch_id = -1; - index = -1; - level = -1; - } - ScoreWithID(float score_, int batch_id_, int index_, int level_) { - score = score_; - batch_id = batch_id_; - index = index_; - level = level_; - } -}; - -static inline bool CompareByScore(ScoreWithID a, ScoreWithID b) { - return a.score >= b.score; -} - -static inline bool CompareByBatchid(ScoreWithID a, ScoreWithID b) { - return a.batch_id < b.batch_id; -} - -void CollectFpnProposalsCompute::Run() { - auto& param = Param(); - auto multi_layer_rois = param.multi_level_rois; - auto multi_layer_scores = param.multi_level_scores; - auto* fpn_rois = param.fpn_rois; - int post_nms_topN = param.post_nms_topN; - - if (multi_layer_rois.size() != multi_layer_scores.size()) { - LOG(FATAL) << "multi_layer_rois.size() should be equan to " - "multi_layer_scores.size()"; - } - - size_t num_fpn_level = multi_layer_rois.size(); - std::vector integral_of_all_rois(num_fpn_level + 1, 0); - int num_size = param.multi_rois_num.size(); - for (size_t i = 0; i < num_fpn_level; ++i) { - int all_rois = 0; - if (num_size == 0) { - auto cur_rois_lod = multi_layer_rois[i]->lod().back(); - all_rois = cur_rois_lod[cur_rois_lod.size() - 1]; - } else { - const int* cur_rois_num = param.multi_rois_num[i]->data(); - all_rois = std::accumulate( - cur_rois_num, cur_rois_num + param.multi_rois_num[i]->numel(), 0); - } - integral_of_all_rois[i + 1] = integral_of_all_rois[i] + all_rois; - } - const int batch_size = (num_size == 0) - ? multi_layer_rois[0]->lod().back().size() - 1 - : param.multi_rois_num[0]->numel(); - std::vector scores_of_all_rois( - integral_of_all_rois[num_fpn_level], ScoreWithID()); - for (int i = 0; i < num_fpn_level; ++i) { - const float* cur_level_scores = multi_layer_scores[i]->data(); - int cur_level_num = integral_of_all_rois[i + 1] - integral_of_all_rois[i]; - auto cur_scores_lod = multi_layer_scores[i]->lod().back(); - int cur_batch_id = 0; - int pre_num = 0; - for (int j = 0; j < cur_level_num; ++j) { - if (num_size == 0) { - auto cur_scores_lod = multi_layer_scores[i]->lod().back(); - if (static_cast(j) >= cur_scores_lod[cur_batch_id + 1]) { - cur_batch_id++; - } - } else { - const int* rois_num_data = param.multi_rois_num[i]->data(); - if (j >= pre_num + rois_num_data[cur_batch_id]) { - pre_num += rois_num_data[cur_batch_id]; - cur_batch_id++; - } - } - int cur_index = j + integral_of_all_rois[i]; - scores_of_all_rois[cur_index].score = cur_level_scores[j]; - scores_of_all_rois[cur_index].index = j; - scores_of_all_rois[cur_index].level = i; - scores_of_all_rois[cur_index].batch_id = cur_batch_id; - } - } - - // keep top post_nms_topN rois, sort the rois by the score - if (post_nms_topN > integral_of_all_rois[num_fpn_level]) { - post_nms_topN = integral_of_all_rois[num_fpn_level]; - } - std::stable_sort( - scores_of_all_rois.begin(), scores_of_all_rois.end(), CompareByScore); - scores_of_all_rois.resize(post_nms_topN); - // sort by batch id - std::stable_sort( - scores_of_all_rois.begin(), scores_of_all_rois.end(), CompareByBatchid); - // create a pointer array - std::vector multi_fpn_rois_data(num_fpn_level); - for (int i = 0; i < num_fpn_level; ++i) { - multi_fpn_rois_data[i] = multi_layer_rois[i]->data(); - } - - // initialize the outputs - const int kBoxDim = 4; - auto fpn_rois_data = fpn_rois->mutable_data(); - std::vector lod0(1, 0); - int cur_batch_id = 0; - std::vector num_per_batch; - int pre_idx = 0; - int cur_num = 0; - for (int i = 0; i < post_nms_topN; ++i) { - int cur_fpn_level = scores_of_all_rois[i].level; - int cur_level_index = scores_of_all_rois[i].index; - std::memcpy(fpn_rois_data, - multi_fpn_rois_data[cur_fpn_level] + cur_level_index * kBoxDim, - kBoxDim * sizeof(float)); - fpn_rois_data += kBoxDim; - if (scores_of_all_rois[i].batch_id != cur_batch_id) { - cur_batch_id = scores_of_all_rois[i].batch_id; - lod0.emplace_back(i); - cur_num = i - pre_idx; - pre_idx = i; - num_per_batch.emplace_back(cur_num); - } - } - num_per_batch.emplace_back(post_nms_topN - pre_idx); - if (param.rois_num) { - int* rois_num_data = param.rois_num->mutable_data(); - for (int i = 0; i < batch_size; i++) { - rois_num_data[i] = num_per_batch[i]; - } - } - lod0.emplace_back(post_nms_topN); - lite::LoD lod; - lod.emplace_back(lod0); - fpn_rois->set_lod(lod); - return; -} - -} // namespace arm -} // namespace kernels -} // namespace lite -} // namespace paddle +#include "lite/kernels/host/collect_fpn_proposals_compute.h" REGISTER_LITE_KERNEL(collect_fpn_proposals, kARM, kFloat, kNCHW, - paddle::lite::kernels::arm::CollectFpnProposalsCompute, + paddle::lite::kernels::host::CollectFpnProposalsCompute, def) .BindInput("MultiLevelRois", {LiteType::GetTensorTy(TARGET(kARM))}) .BindInput("MultiLevelScores", {LiteType::GetTensorTy(TARGET(kARM))}) diff --git a/lite/kernels/arm/distribute_fpn_proposals_compute.cc b/lite/kernels/arm/distribute_fpn_proposals_compute.cc index eb8df0650c1..ec0e0df7fb5 100644 --- a/lite/kernels/arm/distribute_fpn_proposals_compute.cc +++ b/lite/kernels/arm/distribute_fpn_proposals_compute.cc @@ -12,167 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "lite/kernels/arm/distribute_fpn_proposals_compute.h" -#include -#include -#include "lite/backends/arm/math/funcs.h" -#include "lite/core/op_registry.h" -#include "lite/core/tensor.h" -#include "lite/core/type_system.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace arm { - -const int kBoxDim = 4; - -template -static inline T BBoxArea(const T* box, bool normalized) { - if (box[2] < box[0] || box[3] < box[1]) { - // If coordinate values are is invalid - // (e.g. xmax < xmin or ymax < ymin), return 0. - return static_cast(0.); - } else { - const T w = box[2] - box[0]; - const T h = box[3] - box[1]; - if (normalized) { - return w * h; - } else { - // If coordinate values are not within range [0, 1]. - return (w + 1) * (h + 1); - } - } -} - -inline std::vector GetLodFromRoisNum(const Tensor* rois_num) { - std::vector rois_lod; - auto* rois_num_data = rois_num->data(); - - rois_lod.push_back(static_cast(0)); - for (int i = 0; i < rois_num->numel(); ++i) { - rois_lod.push_back(rois_lod.back() + - static_cast(rois_num_data[i])); - } - return rois_lod; -} - -void DistributeFpnProposalsCompute::Run() { - auto& param = Param(); - const lite::Tensor* fpn_rois = param.fpn_rois; - std::vector multi_fpn_rois = param.multi_fpn_rois; - lite::Tensor* restore_index = param.restore_index; - int min_level = param.min_level; - int max_level = param.max_level; - int refer_level = param.refer_level; - int refer_scale = param.refer_scale; - int num_level = max_level - min_level + 1; - - std::vector fpn_rois_lod; - int fpn_rois_num; - if (param.rois_num) { - fpn_rois_lod = GetLodFromRoisNum(param.rois_num); - } else { - fpn_rois_lod = fpn_rois->lod().back(); - } - fpn_rois_num = fpn_rois_lod[fpn_rois_lod.size() - 1]; - - std::vector target_level; - // record the number of rois in each level - std::vector num_rois_level(num_level, 0); - std::vector num_rois_level_integral(num_level + 1, 0); - for (size_t i = 0; i < fpn_rois_lod.size() - 1; ++i) { - auto fpn_rois_slice = - fpn_rois->Slice(static_cast(fpn_rois_lod[i]), - static_cast(fpn_rois_lod[i + 1])); - const float* rois_data = fpn_rois_slice.data(); - for (int j = 0; j < fpn_rois_slice.dims()[0]; ++j) { - // get the target level of current rois - float roi_scale = std::sqrt(BBoxArea(rois_data, false)); - int tgt_lvl = - std::floor(log2(roi_scale / refer_scale + static_cast(1e-6)) + - refer_level); - tgt_lvl = std::min(max_level, std::max(tgt_lvl, min_level)); - target_level.push_back(tgt_lvl); - num_rois_level[tgt_lvl - min_level]++; - rois_data += kBoxDim; - } - } - // define the output rois - // pointer which point to each level fpn rois - std::vector multi_fpn_rois_data(num_level); - // lod0 which will record the offset information of each level rois - std::vector> multi_fpn_rois_lod0; - for (int i = 0; i < num_level; ++i) { - // allocate memory for each level rois - multi_fpn_rois[i]->Resize({num_rois_level[i], kBoxDim}); - multi_fpn_rois_data[i] = multi_fpn_rois[i]->mutable_data(); - std::vector lod0(1, 0); - multi_fpn_rois_lod0.push_back(lod0); - // statistic start point for each level rois - num_rois_level_integral[i + 1] = - num_rois_level_integral[i] + num_rois_level[i]; - } - restore_index->Resize({fpn_rois_num, 1}); - int* restore_index_data = restore_index->mutable_data(); - std::vector restore_index_inter(fpn_rois_num, -1); - // distribute the rois into different fpn level by target level - for (size_t i = 0; i < fpn_rois_lod.size() - 1; ++i) { - Tensor fpn_rois_slice = - fpn_rois->Slice(static_cast(fpn_rois_lod[i]), - static_cast(fpn_rois_lod[i + 1])); - const float* rois_data = fpn_rois_slice.data(); - size_t cur_offset = fpn_rois_lod[i]; - // std::vector lod_offset[num_level]; - for (int j = 0; j < num_level; j++) { - multi_fpn_rois_lod0[j].push_back(multi_fpn_rois_lod0[j][i]); - } - for (int j = 0; j < fpn_rois_slice.dims()[0]; ++j) { - int lvl = target_level[cur_offset + j]; - memcpy(multi_fpn_rois_data[lvl - min_level], - rois_data, - kBoxDim * sizeof(float)); - multi_fpn_rois_data[lvl - min_level] += kBoxDim; - int index_in_shuffle = num_rois_level_integral[lvl - min_level] + - multi_fpn_rois_lod0[lvl - min_level][i + 1]; - restore_index_inter[index_in_shuffle] = cur_offset + j; - multi_fpn_rois_lod0[lvl - min_level][i + 1]++; - rois_data += kBoxDim; - } - } - for (int i = 0; i < fpn_rois_num; ++i) { - restore_index_data[restore_index_inter[i]] = i; - } - if (param.multi_rois_num.size() > 0) { - int batch_size = fpn_rois_lod.size() - 1; - for (int i = 0; i < num_level; ++i) { - param.multi_rois_num[i]->Resize({batch_size}); - int* rois_num_data = param.multi_rois_num[i]->mutable_data(); - for (int j = 0; j < batch_size; ++j) { - rois_num_data[j] = static_cast(multi_fpn_rois_lod0[i][j + 1] - - multi_fpn_rois_lod0[i][j]); - } - } - } - // merge lod information into LoDTensor - for (int i = 0; i < num_level; ++i) { - lite::LoD lod; - lod.emplace_back(multi_fpn_rois_lod0[i]); - multi_fpn_rois[i]->set_lod(lod); - } - return; -} - -} // namespace arm -} // namespace kernels -} // namespace lite -} // namespace paddle +#include "lite/kernels/host/distribute_fpn_proposals_compute.h" REGISTER_LITE_KERNEL(distribute_fpn_proposals, kARM, kFloat, kNCHW, - paddle::lite::kernels::arm::DistributeFpnProposalsCompute, + paddle::lite::kernels::host::DistributeFpnProposalsCompute, def) .BindInput("FpnRois", {LiteType::GetTensorTy(TARGET(kARM))}) .BindInput("RoisNum", diff --git a/lite/kernels/host/CMakeLists.txt b/lite/kernels/host/CMakeLists.txt index 7f861dcfb00..16d6d90ce22 100644 --- a/lite/kernels/host/CMakeLists.txt +++ b/lite/kernels/host/CMakeLists.txt @@ -75,7 +75,9 @@ add_kernel(linspace_compute_host Host extra SRCS linspace_compute.cc DEPS ${lite add_kernel(beam_search_decode_compute_host Host extra SRCS beam_search_decode_compute.cc DEPS ${lite_kernel_deps}) add_kernel(roi_perspective_transform_compute_host Host extra SRCS roi_perspective_transform_compute.cc DEPS ${lite_kernel_deps}) add_kernel(lod_reset_compute_host Host extra SRCS lod_reset_compute.cc DEPS ${lite_kernel_deps}) -add_kernel(argsort Host extra SRCS argsort_compute.cc DEPS ${lite_kernel_deps}) +add_kernel(argsort_compute_host Host extra SRCS argsort_compute.cc DEPS ${lite_kernel_deps}) +add_kernel(distribute_fpn_proposals_compute_host Host extra SRCS distribute_fpn_proposals_compute.cc DEPS ${lite_kernel_deps}) +add_kernel(collect_fpn_proposals_compute_host Host extra SRCS collect_fpn_proposals_compute.cc DEPS ${lite_kernel_deps}) if(LITE_BUILD_EXTRA AND LITE_WITH_x86) lite_cc_test(test_where_index_compute_host SRCS where_index_compute.cc DEPS where_index_compute_host) diff --git a/lite/kernels/host/collect_fpn_proposals_compute.cc b/lite/kernels/host/collect_fpn_proposals_compute.cc new file mode 100644 index 00000000000..d13f53799af --- /dev/null +++ b/lite/kernels/host/collect_fpn_proposals_compute.cc @@ -0,0 +1,180 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/host/collect_fpn_proposals_compute.h" +#include +#include +#include + +namespace paddle { +namespace lite { +namespace kernels { +namespace host { + +struct ScoreWithID { + float score; + int batch_id; + int index; + int level; + ScoreWithID() { + batch_id = -1; + index = -1; + level = -1; + } + ScoreWithID(float score_, int batch_id_, int index_, int level_) { + score = score_; + batch_id = batch_id_; + index = index_; + level = level_; + } +}; + +static inline bool CompareByScore(ScoreWithID a, ScoreWithID b) { + return a.score >= b.score; +} + +static inline bool CompareByBatchid(ScoreWithID a, ScoreWithID b) { + return a.batch_id < b.batch_id; +} + +void CollectFpnProposalsCompute::Run() { + auto& param = Param(); + auto multi_layer_rois = param.multi_level_rois; + auto multi_layer_scores = param.multi_level_scores; + auto* fpn_rois = param.fpn_rois; + int post_nms_topN = param.post_nms_topN; + + if (multi_layer_rois.size() != multi_layer_scores.size()) { + LOG(FATAL) << "multi_layer_rois.size() should be equan to " + "multi_layer_scores.size()"; + } + + size_t num_fpn_level = multi_layer_rois.size(); + std::vector integral_of_all_rois(num_fpn_level + 1, 0); + int num_size = param.multi_rois_num.size(); + for (size_t i = 0; i < num_fpn_level; ++i) { + int all_rois = 0; + if (num_size == 0) { + auto cur_rois_lod = multi_layer_rois[i]->lod().back(); + all_rois = cur_rois_lod[cur_rois_lod.size() - 1]; + } else { + const int* cur_rois_num = param.multi_rois_num[i]->data(); + all_rois = std::accumulate( + cur_rois_num, cur_rois_num + param.multi_rois_num[i]->numel(), 0); + } + integral_of_all_rois[i + 1] = integral_of_all_rois[i] + all_rois; + } + const int batch_size = (num_size == 0) + ? multi_layer_rois[0]->lod().back().size() - 1 + : param.multi_rois_num[0]->numel(); + std::vector scores_of_all_rois( + integral_of_all_rois[num_fpn_level], ScoreWithID()); + for (int i = 0; i < num_fpn_level; ++i) { + const float* cur_level_scores = multi_layer_scores[i]->data(); + int cur_level_num = integral_of_all_rois[i + 1] - integral_of_all_rois[i]; + auto cur_scores_lod = multi_layer_scores[i]->lod().back(); + int cur_batch_id = 0; + int pre_num = 0; + for (int j = 0; j < cur_level_num; ++j) { + if (num_size == 0) { + auto cur_scores_lod = multi_layer_scores[i]->lod().back(); + if (static_cast(j) >= cur_scores_lod[cur_batch_id + 1]) { + cur_batch_id++; + } + } else { + const int* rois_num_data = param.multi_rois_num[i]->data(); + if (j >= pre_num + rois_num_data[cur_batch_id]) { + pre_num += rois_num_data[cur_batch_id]; + cur_batch_id++; + } + } + int cur_index = j + integral_of_all_rois[i]; + scores_of_all_rois[cur_index].score = cur_level_scores[j]; + scores_of_all_rois[cur_index].index = j; + scores_of_all_rois[cur_index].level = i; + scores_of_all_rois[cur_index].batch_id = cur_batch_id; + } + } + + // keep top post_nms_topN rois, sort the rois by the score + if (post_nms_topN > integral_of_all_rois[num_fpn_level]) { + post_nms_topN = integral_of_all_rois[num_fpn_level]; + } + std::stable_sort( + scores_of_all_rois.begin(), scores_of_all_rois.end(), CompareByScore); + scores_of_all_rois.resize(post_nms_topN); + // sort by batch id + std::stable_sort( + scores_of_all_rois.begin(), scores_of_all_rois.end(), CompareByBatchid); + // create a pointer array + std::vector multi_fpn_rois_data(num_fpn_level); + for (int i = 0; i < num_fpn_level; ++i) { + multi_fpn_rois_data[i] = multi_layer_rois[i]->data(); + } + + // initialize the outputs + const int kBoxDim = 4; + auto fpn_rois_data = fpn_rois->mutable_data(); + std::vector lod0(1, 0); + int cur_batch_id = 0; + std::vector num_per_batch; + int pre_idx = 0; + int cur_num = 0; + for (int i = 0; i < post_nms_topN; ++i) { + int cur_fpn_level = scores_of_all_rois[i].level; + int cur_level_index = scores_of_all_rois[i].index; + std::memcpy(fpn_rois_data, + multi_fpn_rois_data[cur_fpn_level] + cur_level_index * kBoxDim, + kBoxDim * sizeof(float)); + fpn_rois_data += kBoxDim; + if (scores_of_all_rois[i].batch_id != cur_batch_id) { + cur_batch_id = scores_of_all_rois[i].batch_id; + lod0.emplace_back(i); + cur_num = i - pre_idx; + pre_idx = i; + num_per_batch.emplace_back(cur_num); + } + } + num_per_batch.emplace_back(post_nms_topN - pre_idx); + if (param.rois_num) { + int* rois_num_data = param.rois_num->mutable_data(); + for (int i = 0; i < batch_size; i++) { + rois_num_data[i] = num_per_batch[i]; + } + } + lod0.emplace_back(post_nms_topN); + lite::LoD lod; + lod.emplace_back(lod0); + fpn_rois->set_lod(lod); + return; +} + +} // namespace host +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_LITE_KERNEL(collect_fpn_proposals, + kHost, + kFloat, + kNCHW, + paddle::lite::kernels::host::CollectFpnProposalsCompute, + def) + .BindInput("MultiLevelRois", {LiteType::GetTensorTy(TARGET(kHost))}) + .BindInput("MultiLevelScores", {LiteType::GetTensorTy(TARGET(kHost))}) + .BindInput("RoisNum", {LiteType::GetTensorTy(TARGET(kHost))}) + .BindOutput("FpnRois", {LiteType::GetTensorTy(TARGET(kHost))}) + .BindOutput("MultiLevelRoIsNum", {LiteType::GetTensorTy(TARGET(kHost))}) + .BindPaddleOpVersion("collect_fpn_proposals", 1) + .Finalize(); diff --git a/lite/kernels/arm/collect_fpn_proposals_compute.h b/lite/kernels/host/collect_fpn_proposals_compute.h similarity index 86% rename from lite/kernels/arm/collect_fpn_proposals_compute.h rename to lite/kernels/host/collect_fpn_proposals_compute.h index f1e7448a07a..f00b312dc20 100644 --- a/lite/kernels/arm/collect_fpn_proposals_compute.h +++ b/lite/kernels/host/collect_fpn_proposals_compute.h @@ -13,17 +13,16 @@ // limitations under the License. #pragma once -#include #include "lite/core/kernel.h" -#include "lite/operators/axpy_op.h" +#include "lite/core/op_registry.h" namespace paddle { namespace lite { namespace kernels { -namespace arm { +namespace host { class CollectFpnProposalsCompute - : public KernelLite { + : public KernelLite { public: using param_t = operators::CollectFpnProposalsParam; @@ -32,7 +31,7 @@ class CollectFpnProposalsCompute virtual ~CollectFpnProposalsCompute() = default; }; -} // namespace arm +} // namespace host } // namespace kernels } // namespace lite } // namespace paddle diff --git a/lite/kernels/host/distribute_fpn_proposals_compute.cc b/lite/kernels/host/distribute_fpn_proposals_compute.cc new file mode 100644 index 00000000000..bc8e4dfe9db --- /dev/null +++ b/lite/kernels/host/distribute_fpn_proposals_compute.cc @@ -0,0 +1,183 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/host/distribute_fpn_proposals_compute.h" +#include +#include +#include +#include + +namespace paddle { +namespace lite { +namespace kernels { +namespace host { + +const int kBoxDim = 4; + +template +static inline T BBoxArea(const T* box, bool normalized) { + if (box[2] < box[0] || box[3] < box[1]) { + // If coordinate values are is invalid + // (e.g. xmax < xmin or ymax < ymin), return 0. + return static_cast(0.); + } else { + const T w = box[2] - box[0]; + const T h = box[3] - box[1]; + if (normalized) { + return w * h; + } else { + // If coordinate values are not within range [0, 1]. + return (w + 1) * (h + 1); + } + } +} + +inline std::vector GetLodFromRoisNum(const Tensor* rois_num) { + std::vector rois_lod; + auto* rois_num_data = rois_num->data(); + + rois_lod.push_back(static_cast(0)); + for (int i = 0; i < rois_num->numel(); ++i) { + rois_lod.push_back(rois_lod.back() + + static_cast(rois_num_data[i])); + } + return rois_lod; +} + +void DistributeFpnProposalsCompute::Run() { + auto& param = Param(); + const lite::Tensor* fpn_rois = param.fpn_rois; + std::vector multi_fpn_rois = param.multi_fpn_rois; + lite::Tensor* restore_index = param.restore_index; + int min_level = param.min_level; + int max_level = param.max_level; + int refer_level = param.refer_level; + int refer_scale = param.refer_scale; + int num_level = max_level - min_level + 1; + + std::vector fpn_rois_lod; + int fpn_rois_num; + if (param.rois_num) { + fpn_rois_lod = GetLodFromRoisNum(param.rois_num); + } else { + fpn_rois_lod = fpn_rois->lod().back(); + } + fpn_rois_num = fpn_rois_lod[fpn_rois_lod.size() - 1]; + + std::vector target_level; + // record the number of rois in each level + std::vector num_rois_level(num_level, 0); + std::vector num_rois_level_integral(num_level + 1, 0); + for (size_t i = 0; i < fpn_rois_lod.size() - 1; ++i) { + auto fpn_rois_slice = + fpn_rois->Slice(static_cast(fpn_rois_lod[i]), + static_cast(fpn_rois_lod[i + 1])); + const float* rois_data = fpn_rois_slice.data(); + for (int j = 0; j < fpn_rois_slice.dims()[0]; ++j) { + // get the target level of current rois + float roi_scale = std::sqrt(BBoxArea(rois_data, false)); + int tgt_lvl = + std::floor(log2(roi_scale / refer_scale + static_cast(1e-6)) + + refer_level); + tgt_lvl = std::min(max_level, std::max(tgt_lvl, min_level)); + target_level.push_back(tgt_lvl); + num_rois_level[tgt_lvl - min_level]++; + rois_data += kBoxDim; + } + } + // define the output rois + // pointer which point to each level fpn rois + std::vector multi_fpn_rois_data(num_level); + // lod0 which will record the offset information of each level rois + std::vector> multi_fpn_rois_lod0; + for (int i = 0; i < num_level; ++i) { + // allocate memory for each level rois + multi_fpn_rois[i]->Resize({num_rois_level[i], kBoxDim}); + multi_fpn_rois_data[i] = multi_fpn_rois[i]->mutable_data(); + std::vector lod0(1, 0); + multi_fpn_rois_lod0.push_back(lod0); + // statistic start point for each level rois + num_rois_level_integral[i + 1] = + num_rois_level_integral[i] + num_rois_level[i]; + } + restore_index->Resize({fpn_rois_num, 1}); + int* restore_index_data = restore_index->mutable_data(); + std::vector restore_index_inter(fpn_rois_num, -1); + // distribute the rois into different fpn level by target level + for (size_t i = 0; i < fpn_rois_lod.size() - 1; ++i) { + Tensor fpn_rois_slice = + fpn_rois->Slice(static_cast(fpn_rois_lod[i]), + static_cast(fpn_rois_lod[i + 1])); + const float* rois_data = fpn_rois_slice.data(); + size_t cur_offset = fpn_rois_lod[i]; + for (int j = 0; j < num_level; j++) { + multi_fpn_rois_lod0[j].push_back(multi_fpn_rois_lod0[j][i]); + } + for (int j = 0; j < fpn_rois_slice.dims()[0]; ++j) { + int lvl = target_level[cur_offset + j]; + memcpy(multi_fpn_rois_data[lvl - min_level], + rois_data, + kBoxDim * sizeof(float)); + multi_fpn_rois_data[lvl - min_level] += kBoxDim; + int index_in_shuffle = num_rois_level_integral[lvl - min_level] + + multi_fpn_rois_lod0[lvl - min_level][i + 1]; + restore_index_inter[index_in_shuffle] = cur_offset + j; + multi_fpn_rois_lod0[lvl - min_level][i + 1]++; + rois_data += kBoxDim; + } + } + for (int i = 0; i < fpn_rois_num; ++i) { + restore_index_data[restore_index_inter[i]] = i; + } + if (param.multi_rois_num.size() > 0) { + int batch_size = fpn_rois_lod.size() - 1; + for (int i = 0; i < num_level; ++i) { + param.multi_rois_num[i]->Resize({batch_size}); + int* rois_num_data = param.multi_rois_num[i]->mutable_data(); + for (int j = 0; j < batch_size; ++j) { + rois_num_data[j] = static_cast(multi_fpn_rois_lod0[i][j + 1] - + multi_fpn_rois_lod0[i][j]); + } + } + } + // merge lod information into LoDTensor + for (int i = 0; i < num_level; ++i) { + lite::LoD lod; + lod.emplace_back(multi_fpn_rois_lod0[i]); + multi_fpn_rois[i]->set_lod(lod); + } + return; +} + +} // namespace host +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_LITE_KERNEL(distribute_fpn_proposals, + kHost, + kFloat, + kNCHW, + paddle::lite::kernels::host::DistributeFpnProposalsCompute, + def) + .BindInput("FpnRois", {LiteType::GetTensorTy(TARGET(kHost))}) + .BindInput("RoisNum", + {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt32))}) + .BindOutput("MultiFpnRois", {LiteType::GetTensorTy(TARGET(kHost))}) + .BindOutput("MultiLevelRoIsNum", + {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt32))}) + .BindOutput("RestoreIndex", + {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt32))}) + .BindPaddleOpVersion("distribute_fpn_proposals", 1) + .Finalize(); diff --git a/lite/kernels/arm/distribute_fpn_proposals_compute.h b/lite/kernels/host/distribute_fpn_proposals_compute.h similarity index 85% rename from lite/kernels/arm/distribute_fpn_proposals_compute.h rename to lite/kernels/host/distribute_fpn_proposals_compute.h index e150b338de0..88e99674ee2 100644 --- a/lite/kernels/arm/distribute_fpn_proposals_compute.h +++ b/lite/kernels/host/distribute_fpn_proposals_compute.h @@ -13,17 +13,16 @@ // limitations under the License. #pragma once -#include #include "lite/core/kernel.h" -#include "lite/operators/distribute_fpn_proposals_op.h" +#include "lite/core/op_registry.h" namespace paddle { namespace lite { namespace kernels { -namespace arm { +namespace host { class DistributeFpnProposalsCompute - : public KernelLite { + : public KernelLite { public: using param_t = operators::DistributeFpnProposalsParam; @@ -32,7 +31,7 @@ class DistributeFpnProposalsCompute virtual ~DistributeFpnProposalsCompute() = default; }; -} // namespace arm +} // namespace host } // namespace kernels } // namespace lite } // namespace paddle