Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions lite/kernels/arm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ add_kernel(generate_proposals_v2_compute_arm ARM extra SRCS generate_proposals_v
add_kernel(roi_align_compute_arm ARM extra SRCS roi_align_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(box_clip_compute_arm ARM extra SRCS box_clip_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(assign_value_compute_arm ARM basic SRCS assign_value_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(collect_fpn_proposals_compute_arm ARM extra SRCS collect_fpn_proposals_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(distribute_fpn_proposals_compute_arm ARM extra SRCS distribute_fpn_proposals_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(collect_fpn_proposals_compute_arm ARM extra SRCS collect_fpn_proposals_compute.cc DEPS ${lite_kernel_deps} collect_fpn_proposals_compute_host)
add_kernel(distribute_fpn_proposals_compute_arm ARM extra SRCS distribute_fpn_proposals_compute.cc DEPS ${lite_kernel_deps} distribute_fpn_proposals_compute_host)
add_kernel(clip_compute_arm ARM extra SRCS clip_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(pixel_shuffle_compute_arm ARM extra SRCS pixel_shuffle_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(scatter_compute_arm ARM extra SRCS scatter_compute.cc DEPS ${lite_kernel_deps} math_arm)
Expand Down
159 changes: 2 additions & 157 deletions lite/kernels/arm/collect_fpn_proposals_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,168 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/kernels/arm/collect_fpn_proposals_compute.h"
#include <numeric>
#include <string>
#include <vector>
#include "lite/backends/arm/math/funcs.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
#include "lite/core/type_system.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace arm {

struct ScoreWithID {
float score;
int batch_id;
int index;
int level;
ScoreWithID() {
batch_id = -1;
index = -1;
level = -1;
}
ScoreWithID(float score_, int batch_id_, int index_, int level_) {
score = score_;
batch_id = batch_id_;
index = index_;
level = level_;
}
};

static inline bool CompareByScore(ScoreWithID a, ScoreWithID b) {
return a.score >= b.score;
}

static inline bool CompareByBatchid(ScoreWithID a, ScoreWithID b) {
return a.batch_id < b.batch_id;
}

void CollectFpnProposalsCompute::Run() {
auto& param = Param<operators::CollectFpnProposalsParam>();
auto multi_layer_rois = param.multi_level_rois;
auto multi_layer_scores = param.multi_level_scores;
auto* fpn_rois = param.fpn_rois;
int post_nms_topN = param.post_nms_topN;

if (multi_layer_rois.size() != multi_layer_scores.size()) {
LOG(FATAL) << "multi_layer_rois.size() should be equan to "
"multi_layer_scores.size()";
}

size_t num_fpn_level = multi_layer_rois.size();
std::vector<int> integral_of_all_rois(num_fpn_level + 1, 0);
int num_size = param.multi_rois_num.size();
for (size_t i = 0; i < num_fpn_level; ++i) {
int all_rois = 0;
if (num_size == 0) {
auto cur_rois_lod = multi_layer_rois[i]->lod().back();
all_rois = cur_rois_lod[cur_rois_lod.size() - 1];
} else {
const int* cur_rois_num = param.multi_rois_num[i]->data<int>();
all_rois = std::accumulate(
cur_rois_num, cur_rois_num + param.multi_rois_num[i]->numel(), 0);
}
integral_of_all_rois[i + 1] = integral_of_all_rois[i] + all_rois;
}
const int batch_size = (num_size == 0)
? multi_layer_rois[0]->lod().back().size() - 1
: param.multi_rois_num[0]->numel();
std::vector<ScoreWithID> scores_of_all_rois(
integral_of_all_rois[num_fpn_level], ScoreWithID());
for (int i = 0; i < num_fpn_level; ++i) {
const float* cur_level_scores = multi_layer_scores[i]->data<float>();
int cur_level_num = integral_of_all_rois[i + 1] - integral_of_all_rois[i];
auto cur_scores_lod = multi_layer_scores[i]->lod().back();
int cur_batch_id = 0;
int pre_num = 0;
for (int j = 0; j < cur_level_num; ++j) {
if (num_size == 0) {
auto cur_scores_lod = multi_layer_scores[i]->lod().back();
if (static_cast<size_t>(j) >= cur_scores_lod[cur_batch_id + 1]) {
cur_batch_id++;
}
} else {
const int* rois_num_data = param.multi_rois_num[i]->data<int>();
if (j >= pre_num + rois_num_data[cur_batch_id]) {
pre_num += rois_num_data[cur_batch_id];
cur_batch_id++;
}
}
int cur_index = j + integral_of_all_rois[i];
scores_of_all_rois[cur_index].score = cur_level_scores[j];
scores_of_all_rois[cur_index].index = j;
scores_of_all_rois[cur_index].level = i;
scores_of_all_rois[cur_index].batch_id = cur_batch_id;
}
}

// keep top post_nms_topN rois, sort the rois by the score
if (post_nms_topN > integral_of_all_rois[num_fpn_level]) {
post_nms_topN = integral_of_all_rois[num_fpn_level];
}
std::stable_sort(
scores_of_all_rois.begin(), scores_of_all_rois.end(), CompareByScore);
scores_of_all_rois.resize(post_nms_topN);
// sort by batch id
std::stable_sort(
scores_of_all_rois.begin(), scores_of_all_rois.end(), CompareByBatchid);
// create a pointer array
std::vector<const float*> multi_fpn_rois_data(num_fpn_level);
for (int i = 0; i < num_fpn_level; ++i) {
multi_fpn_rois_data[i] = multi_layer_rois[i]->data<float>();
}

// initialize the outputs
const int kBoxDim = 4;
auto fpn_rois_data = fpn_rois->mutable_data<float>();
std::vector<uint64_t> lod0(1, 0);
int cur_batch_id = 0;
std::vector<int64_t> num_per_batch;
int pre_idx = 0;
int cur_num = 0;
for (int i = 0; i < post_nms_topN; ++i) {
int cur_fpn_level = scores_of_all_rois[i].level;
int cur_level_index = scores_of_all_rois[i].index;
std::memcpy(fpn_rois_data,
multi_fpn_rois_data[cur_fpn_level] + cur_level_index * kBoxDim,
kBoxDim * sizeof(float));
fpn_rois_data += kBoxDim;
if (scores_of_all_rois[i].batch_id != cur_batch_id) {
cur_batch_id = scores_of_all_rois[i].batch_id;
lod0.emplace_back(i);
cur_num = i - pre_idx;
pre_idx = i;
num_per_batch.emplace_back(cur_num);
}
}
num_per_batch.emplace_back(post_nms_topN - pre_idx);
if (param.rois_num) {
int* rois_num_data = param.rois_num->mutable_data<int>();
for (int i = 0; i < batch_size; i++) {
rois_num_data[i] = num_per_batch[i];
}
}
lod0.emplace_back(post_nms_topN);
lite::LoD lod;
lod.emplace_back(lod0);
fpn_rois->set_lod(lod);
return;
}

} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
#include "lite/kernels/host/collect_fpn_proposals_compute.h"

REGISTER_LITE_KERNEL(collect_fpn_proposals,
kARM,
kFloat,
kNCHW,
paddle::lite::kernels::arm::CollectFpnProposalsCompute,
paddle::lite::kernels::host::CollectFpnProposalsCompute,
def)
.BindInput("MultiLevelRois", {LiteType::GetTensorTy(TARGET(kARM))})
.BindInput("MultiLevelScores", {LiteType::GetTensorTy(TARGET(kARM))})
Expand Down
158 changes: 2 additions & 156 deletions lite/kernels/arm/distribute_fpn_proposals_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,167 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/kernels/arm/distribute_fpn_proposals_compute.h"
#include <string>
#include <vector>
#include "lite/backends/arm/math/funcs.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
#include "lite/core/type_system.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace arm {

const int kBoxDim = 4;

template <typename T>
static inline T BBoxArea(const T* box, bool normalized) {
if (box[2] < box[0] || box[3] < box[1]) {
// If coordinate values are is invalid
// (e.g. xmax < xmin or ymax < ymin), return 0.
return static_cast<T>(0.);
} else {
const T w = box[2] - box[0];
const T h = box[3] - box[1];
if (normalized) {
return w * h;
} else {
// If coordinate values are not within range [0, 1].
return (w + 1) * (h + 1);
}
}
}

inline std::vector<uint64_t> GetLodFromRoisNum(const Tensor* rois_num) {
std::vector<uint64_t> rois_lod;
auto* rois_num_data = rois_num->data<int>();

rois_lod.push_back(static_cast<uint64_t>(0));
for (int i = 0; i < rois_num->numel(); ++i) {
rois_lod.push_back(rois_lod.back() +
static_cast<uint64_t>(rois_num_data[i]));
}
return rois_lod;
}

void DistributeFpnProposalsCompute::Run() {
auto& param = Param<operators::DistributeFpnProposalsParam>();
const lite::Tensor* fpn_rois = param.fpn_rois;
std::vector<lite::Tensor*> multi_fpn_rois = param.multi_fpn_rois;
lite::Tensor* restore_index = param.restore_index;
int min_level = param.min_level;
int max_level = param.max_level;
int refer_level = param.refer_level;
int refer_scale = param.refer_scale;
int num_level = max_level - min_level + 1;

std::vector<uint64_t> fpn_rois_lod;
int fpn_rois_num;
if (param.rois_num) {
fpn_rois_lod = GetLodFromRoisNum(param.rois_num);
} else {
fpn_rois_lod = fpn_rois->lod().back();
}
fpn_rois_num = fpn_rois_lod[fpn_rois_lod.size() - 1];

std::vector<int> target_level;
// record the number of rois in each level
std::vector<int> num_rois_level(num_level, 0);
std::vector<int> num_rois_level_integral(num_level + 1, 0);
for (size_t i = 0; i < fpn_rois_lod.size() - 1; ++i) {
auto fpn_rois_slice =
fpn_rois->Slice<float>(static_cast<int64_t>(fpn_rois_lod[i]),
static_cast<int64_t>(fpn_rois_lod[i + 1]));
const float* rois_data = fpn_rois_slice.data<float>();
for (int j = 0; j < fpn_rois_slice.dims()[0]; ++j) {
// get the target level of current rois
float roi_scale = std::sqrt(BBoxArea(rois_data, false));
int tgt_lvl =
std::floor(log2(roi_scale / refer_scale + static_cast<float>(1e-6)) +
refer_level);
tgt_lvl = std::min(max_level, std::max(tgt_lvl, min_level));
target_level.push_back(tgt_lvl);
num_rois_level[tgt_lvl - min_level]++;
rois_data += kBoxDim;
}
}
// define the output rois
// pointer which point to each level fpn rois
std::vector<float*> multi_fpn_rois_data(num_level);
// lod0 which will record the offset information of each level rois
std::vector<std::vector<uint64_t>> multi_fpn_rois_lod0;
for (int i = 0; i < num_level; ++i) {
// allocate memory for each level rois
multi_fpn_rois[i]->Resize({num_rois_level[i], kBoxDim});
multi_fpn_rois_data[i] = multi_fpn_rois[i]->mutable_data<float>();
std::vector<uint64_t> lod0(1, 0);
multi_fpn_rois_lod0.push_back(lod0);
// statistic start point for each level rois
num_rois_level_integral[i + 1] =
num_rois_level_integral[i] + num_rois_level[i];
}
restore_index->Resize({fpn_rois_num, 1});
int* restore_index_data = restore_index->mutable_data<int>();
std::vector<int> restore_index_inter(fpn_rois_num, -1);
// distribute the rois into different fpn level by target level
for (size_t i = 0; i < fpn_rois_lod.size() - 1; ++i) {
Tensor fpn_rois_slice =
fpn_rois->Slice<float>(static_cast<int64_t>(fpn_rois_lod[i]),
static_cast<int64_t>(fpn_rois_lod[i + 1]));
const float* rois_data = fpn_rois_slice.data<float>();
size_t cur_offset = fpn_rois_lod[i];
// std::vector<size_t > lod_offset[num_level];
for (int j = 0; j < num_level; j++) {
multi_fpn_rois_lod0[j].push_back(multi_fpn_rois_lod0[j][i]);
}
for (int j = 0; j < fpn_rois_slice.dims()[0]; ++j) {
int lvl = target_level[cur_offset + j];
memcpy(multi_fpn_rois_data[lvl - min_level],
rois_data,
kBoxDim * sizeof(float));
multi_fpn_rois_data[lvl - min_level] += kBoxDim;
int index_in_shuffle = num_rois_level_integral[lvl - min_level] +
multi_fpn_rois_lod0[lvl - min_level][i + 1];
restore_index_inter[index_in_shuffle] = cur_offset + j;
multi_fpn_rois_lod0[lvl - min_level][i + 1]++;
rois_data += kBoxDim;
}
}
for (int i = 0; i < fpn_rois_num; ++i) {
restore_index_data[restore_index_inter[i]] = i;
}
if (param.multi_rois_num.size() > 0) {
int batch_size = fpn_rois_lod.size() - 1;
for (int i = 0; i < num_level; ++i) {
param.multi_rois_num[i]->Resize({batch_size});
int* rois_num_data = param.multi_rois_num[i]->mutable_data<int>();
for (int j = 0; j < batch_size; ++j) {
rois_num_data[j] = static_cast<int>(multi_fpn_rois_lod0[i][j + 1] -
multi_fpn_rois_lod0[i][j]);
}
}
}
// merge lod information into LoDTensor
for (int i = 0; i < num_level; ++i) {
lite::LoD lod;
lod.emplace_back(multi_fpn_rois_lod0[i]);
multi_fpn_rois[i]->set_lod(lod);
}
return;
}

} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
#include "lite/kernels/host/distribute_fpn_proposals_compute.h"

REGISTER_LITE_KERNEL(distribute_fpn_proposals,
kARM,
kFloat,
kNCHW,
paddle::lite::kernels::arm::DistributeFpnProposalsCompute,
paddle::lite::kernels::host::DistributeFpnProposalsCompute,
def)
.BindInput("FpnRois", {LiteType::GetTensorTy(TARGET(kARM))})
.BindInput("RoisNum",
Expand Down
4 changes: 3 additions & 1 deletion lite/kernels/host/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ add_kernel(linspace_compute_host Host extra SRCS linspace_compute.cc DEPS ${lite
add_kernel(beam_search_decode_compute_host Host extra SRCS beam_search_decode_compute.cc DEPS ${lite_kernel_deps})
add_kernel(roi_perspective_transform_compute_host Host extra SRCS roi_perspective_transform_compute.cc DEPS ${lite_kernel_deps})
add_kernel(lod_reset_compute_host Host extra SRCS lod_reset_compute.cc DEPS ${lite_kernel_deps})
add_kernel(argsort Host extra SRCS argsort_compute.cc DEPS ${lite_kernel_deps})
add_kernel(argsort_compute_host Host extra SRCS argsort_compute.cc DEPS ${lite_kernel_deps})
add_kernel(distribute_fpn_proposals_compute_host Host extra SRCS distribute_fpn_proposals_compute.cc DEPS ${lite_kernel_deps})
add_kernel(collect_fpn_proposals_compute_host Host extra SRCS collect_fpn_proposals_compute.cc DEPS ${lite_kernel_deps})

if(LITE_BUILD_EXTRA AND LITE_WITH_x86)
lite_cc_test(test_where_index_compute_host SRCS where_index_compute.cc DEPS where_index_compute_host)
Expand Down
Loading