diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index 2d7729b722ddb8..d5f2c6d7448d8f 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -28,18 +28,11 @@ function(detection_library TARGET_NAME) PARENT_SCOPE) endfunction() -detection_library(density_prior_box_op SRCS density_prior_box_op.cc - density_prior_box_op.cu) - detection_library(bipartite_match_op SRCS bipartite_match_op.cc) detection_library(anchor_generator_op SRCS anchor_generator_op.cc anchor_generator_op.cu) -detection_library(generate_proposal_labels_op SRCS - generate_proposal_labels_op.cc) detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc DEPS phi common) detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu) -detection_library(box_decoder_and_assign_op SRCS box_decoder_and_assign_op.cc - box_decoder_and_assign_op.cu) if(WITH_GPU OR WITH_ROCM) if(WITH_GPU) @@ -62,8 +55,3 @@ endif() #Export local libraries to parent # set(DETECTION_LIBRARY ${LOCAL_DETECTION_LIBS} PARENT_SCOPE) - -cc_library(mask_util SRCS mask_util.cc) - -detection_library(generate_mask_labels_op SRCS generate_mask_labels_op.cc DEPS - mask_util) diff --git a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc deleted file mode 100644 index a7b9ad490b56cf..00000000000000 --- a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc +++ /dev/null @@ -1,235 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/detection/box_decoder_and_assign_op.h" - -namespace paddle { -namespace operators { - -class BoxDecoderAndAssignOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("PriorBox"), - true, - phi::errors::NotFound("Input(PriorBox) of BoxDecoderAndAssignOp " - "is not found.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("PriorBoxVar"), - true, - phi::errors::NotFound("Input(PriorBoxVar) of BoxDecoderAndAssignOp" - " is not found.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("TargetBox"), - true, - phi::errors::NotFound("Input(TargetBox) of BoxDecoderAndAssignOp " - "is not found.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("BoxScore"), - true, - phi::errors::NotFound("Input(BoxScore) of BoxDecoderAndAssignOp " - "is not found.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("DecodeBox"), - true, - phi::errors::NotFound("Output(DecodeBox) of BoxDecoderAndAssignOp" - " is not found.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("OutputAssignBox"), - true, - phi::errors::NotFound("Output(OutputAssignBox) of " - "BoxDecoderAndAssignOp is not found.")); - - auto prior_box_dims = ctx->GetInputDim("PriorBox"); - auto prior_box_var_dims = ctx->GetInputDim("PriorBoxVar"); - auto target_box_dims = ctx->GetInputDim("TargetBox"); - auto box_score_dims = ctx->GetInputDim("BoxScore"); - - PADDLE_ENFORCE_EQ( - prior_box_dims.size(), - 2, - phi::errors::InvalidArgument("The rank of Input of PriorBox must" - " be 2. But received rank = %d", - prior_box_dims.size())); - PADDLE_ENFORCE_EQ( - prior_box_dims[1], - 4, - phi::errors::InvalidArgument( - "The shape of PriorBox is [N, 4], " - "and the second dimension must be 4. But received dimension = %d", - prior_box_dims[1])); - PADDLE_ENFORCE_EQ( - prior_box_var_dims.size(), - 1, - phi::errors::InvalidArgument("The rank of Input of PriorBoxVar " - "must be 1. But received rank = %d", - prior_box_var_dims.size())); - PADDLE_ENFORCE_EQ( - prior_box_var_dims[0], - 4, - phi::errors::InvalidArgument("The shape of PriorBoxVar is [4]. " - "But received dimension = %d", - prior_box_var_dims[0])); - PADDLE_ENFORCE_EQ( - target_box_dims.size(), - 2, - phi::errors::InvalidArgument("The rank of Input of TargetBox must " - "be 2. But received rank = %d", - target_box_dims.size())); - PADDLE_ENFORCE_EQ( - box_score_dims.size(), - 2, - phi::errors::InvalidArgument("The rank of Input of BoxScore must " - "be 2. But received rank = %d", - box_score_dims.size())); - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ( - prior_box_dims[0], - target_box_dims[0], - phi::errors::InvalidArgument( - "The first dimension of prior_box and " - "target_box is the number of box and should be same. But " - "received dimension of prior_box is %d, dimension of target_box " - "is %d", - prior_box_dims[0], - target_box_dims[0])); - PADDLE_ENFORCE_EQ( - prior_box_dims[0], - box_score_dims[0], - phi::errors::InvalidArgument( - "The first dimension of prior_box and " - "box_score is the number of box and should be same. But received " - "dimension of prior_box is %d, dimension of box_score is %d", - prior_box_dims[0], - box_score_dims[0])); - PADDLE_ENFORCE_EQ( - target_box_dims[1], - box_score_dims[1] * prior_box_dims[1], - phi::errors::InvalidArgument( - "The shape of target_box is " - "[N, classnum * 4], The shape of box_score is [N, classnum], " - "The shape of prior_box is [N, 4]. But received second dimension " - "of " - "target_box is %d, second dimension of box_score_dims is %d, " - "and second dimension of prior_box_dims is %d", - target_box_dims[1], - box_score_dims[1], - prior_box_dims[1])); - } - ctx->SetOutputDim( - "DecodeBox", - common::make_ddim({target_box_dims[0], target_box_dims[1]})); - ctx->ShareLoD("TargetBox", /*->*/ "DecodeBox"); - ctx->SetOutputDim( - "OutputAssignBox", - common::make_ddim({prior_box_dims[0], prior_box_dims[1]})); - ctx->ShareLoD("PriorBox", /*->*/ "OutputAssignBox"); - } -}; - -class BoxDecoderAndAssignOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput( - "PriorBox", - "(Tensor, default Tensor) " - "Box list PriorBox is a 2-D Tensor with shape [N, 4] which holds N " - "boxes and each box is represented as [xmin, ymin, xmax, ymax], " - "[xmin, ymin] is the left top coordinate of the anchor box, " - "if the input is image feature map, they are close to the origin " - "of the coordinate system. [xmax, ymax] is the right bottom " - "coordinate of the anchor box."); - AddInput("PriorBoxVar", - "(Tensor, default Tensor, optional) " - "PriorBoxVar is a 2-D Tensor with shape [N, 4] which holds N " - "group of variance. PriorBoxVar will set all elements to 1 by " - "default.") - .AsDispensable(); - AddInput("TargetBox", - "(phi::DenseTensor or Tensor) " - "This input can be a 2-D phi::DenseTensor with shape " - "[N, classnum*4]. It holds N targets for N boxes."); - AddInput("BoxScore", - "(phi::DenseTensor or Tensor) " - "This input can be a 2-D phi::DenseTensor with shape " - "[N, classnum], each box is represented as [classnum] which is " - "the classification probabilities."); - AddAttr("box_clip", - "(float, default 4.135, np.log(1000. / 16.)) " - "clip box to prevent overflowing") - .SetDefault(4.135f); - AddOutput("DecodeBox", - "(phi::DenseTensor or Tensor) " - "the output tensor of op with shape [N, classnum * 4] " - "representing the result of N target boxes decoded with " - "M Prior boxes and variances for each class."); - AddOutput("OutputAssignBox", - "(phi::DenseTensor or Tensor) " - "the output tensor of op with shape [N, 4] " - "representing the result of N target boxes decoded with " - "M Prior boxes and variances with the best non-background class " - "by BoxScore."); - AddComment(R"DOC( - -Bounding Box Coder. - -Decode the target bounding box with the prior_box information. - -The Decoding schema is described below: - - $$ - ox = (pw \\times pxv \\times tx + px) - \\frac{tw}{2} - $$ - $$ - oy = (ph \\times pyv \\times ty + py) - \\frac{th}{2} - $$ - $$ - ow = \\exp (pwv \\times tw) \\times pw + \\frac{tw}{2} - $$ - $$ - oh = \\exp (phv \\times th) \\times ph + \\frac{th}{2} - $$ - -where `tx`, `ty`, `tw`, `th` denote the target box's center coordinates, width -and height respectively. Similarly, `px`, `py`, `pw`, `ph` denote the -prior_box's (anchor) center coordinates, width and height. `pxv`, `pyv`, `pwv`, -`phv` denote the variance of the prior_box and `ox`, `oy`, `ow`, `oh` denote the -decoded coordinates, width and height in decode_box. - -decode_box is obtained after box decode, then assigning schema is described below: - -For each prior_box, use the best non-background class's decoded values to -update the prior_box locations and get output_assign_box. So, the shape of -output_assign_box is the same as PriorBox. -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - box_decoder_and_assign, - ops::BoxDecoderAndAssignOp, - ops::BoxDecoderAndAssignOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); - -PD_REGISTER_STRUCT_KERNEL(box_decoder_and_assign, - CPU, - ALL_LAYOUT, - ops::BoxDecoderAndAssignKernel, - float, - double) {} diff --git a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cu b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cu deleted file mode 100644 index a956a58ac75f7c..00000000000000 --- a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cu +++ /dev/null @@ -1,159 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/detection/box_decoder_and_assign_op.h" -#include "paddle/fluid/memory/memcpy.h" -#include "paddle/phi/backends/gpu/gpu_primitives.h" - -namespace paddle { -namespace operators { - -template -__global__ void DecodeBoxKernel(const T* prior_box_data, - const T* prior_box_var_data, - const T* target_box_data, - const int roi_num, - const int class_num, - const T box_clip, - T* output_box_data) { - const int idx = threadIdx.x + blockIdx.x * blockDim.x; - if (idx < roi_num * class_num) { - int i = idx / class_num; - int j = idx % class_num; - T prior_box_width = prior_box_data[i * 4 + 2] - prior_box_data[i * 4] + 1; - T prior_box_height = - prior_box_data[i * 4 + 3] - prior_box_data[i * 4 + 1] + 1; - T prior_box_center_x = prior_box_data[i * 4] + prior_box_width / 2; - T prior_box_center_y = prior_box_data[i * 4 + 1] + prior_box_height / 2; - - int offset = i * class_num * 4 + j * 4; - T dw = prior_box_var_data[2] * target_box_data[offset + 2]; - T dh = prior_box_var_data[3] * target_box_data[offset + 3]; - if (dw > box_clip) { - dw = box_clip; - } - if (dh > box_clip) { - dh = box_clip; - } - T target_box_center_x = 0, target_box_center_y = 0; - T target_box_width = 0, target_box_height = 0; - target_box_center_x = - prior_box_var_data[0] * target_box_data[offset] * prior_box_width + - prior_box_center_x; - target_box_center_y = - prior_box_var_data[1] * target_box_data[offset + 1] * prior_box_height + - prior_box_center_y; - target_box_width = expf(dw) * prior_box_width; - target_box_height = expf(dh) * prior_box_height; - - output_box_data[offset] = target_box_center_x - target_box_width / 2; - output_box_data[offset + 1] = target_box_center_y - target_box_height / 2; - output_box_data[offset + 2] = - target_box_center_x + target_box_width / 2 - 1; - output_box_data[offset + 3] = - target_box_center_y + target_box_height / 2 - 1; - } -} - -template -__global__ void AssignBoxKernel(const T* prior_box_data, - const T* box_score_data, - T* output_box_data, - const int roi_num, - const int class_num, - T* output_assign_box_data) { - const int idx = threadIdx.x + blockIdx.x * blockDim.x; - if (idx < roi_num) { - int i = idx; - T max_score = -1; - int max_j = -1; - for (int j = 0; j < class_num; ++j) { - T score = box_score_data[i * class_num + j]; - if (score > max_score && j > 0) { - max_score = score; - max_j = j; - } - } - if (max_j > 0) { - for (int pno = 0; pno < 4; pno++) { - output_assign_box_data[i * 4 + pno] = - output_box_data[i * class_num * 4 + max_j * 4 + pno]; - } - } else { - for (int pno = 0; pno < 4; pno++) { - output_assign_box_data[i * 4 + pno] = prior_box_data[i * 4 + pno]; - } - } - } -} - -template -class BoxDecoderAndAssignCUDAKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* prior_box = context.Input("PriorBox"); - auto* prior_box_var = context.Input("PriorBoxVar"); - auto* target_box = context.Input("TargetBox"); - auto* box_score = context.Input("BoxScore"); - auto* output_box = context.Output("DecodeBox"); - auto* output_assign_box = - context.Output("OutputAssignBox"); - - auto roi_num = target_box->dims()[0]; - auto class_num = box_score->dims()[1]; - auto* target_box_data = target_box->data(); - auto* prior_box_data = prior_box->data(); - auto* prior_box_var_data = prior_box_var->data(); - auto* box_score_data = box_score->data(); - output_box->mutable_data({roi_num, class_num * 4}, context.GetPlace()); - output_assign_box->mutable_data({roi_num, 4}, context.GetPlace()); - T* output_box_data = output_box->data(); - T* output_assign_box_data = output_assign_box->data(); - - int block = 512; - int grid = (roi_num * class_num + block - 1) / block; - auto& device_ctx = context.cuda_device_context(); - - const T box_clip = static_cast(context.Attr("box_clip")); - - DecodeBoxKernel - <<>>(prior_box_data, - prior_box_var_data, - target_box_data, - roi_num, - class_num, - box_clip, - output_box_data); - - context.device_context().Wait(); - int assign_grid = (roi_num + block - 1) / block; - AssignBoxKernel<<>>( - prior_box_data, - box_score_data, - output_box_data, - roi_num, - class_num, - output_assign_box_data); - context.device_context().Wait(); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -PD_REGISTER_STRUCT_KERNEL(box_decoder_and_assign, - GPU, - ALL_LAYOUT, - ops::BoxDecoderAndAssignCUDAKernel, - float, - double) {} diff --git a/paddle/fluid/operators/detection/box_decoder_and_assign_op.h b/paddle/fluid/operators/detection/box_decoder_and_assign_op.h deleted file mode 100644 index 5a191ffaf44746..00000000000000 --- a/paddle/fluid/operators/detection/box_decoder_and_assign_op.h +++ /dev/null @@ -1,104 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -template -class BoxDecoderAndAssignKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* prior_box = context.Input("PriorBox"); - auto* prior_box_var = context.Input("PriorBoxVar"); - auto* target_box = context.Input("TargetBox"); - auto* box_score = context.Input("BoxScore"); - auto* output_box = context.Output("DecodeBox"); - auto* output_assign_box = - context.Output("OutputAssignBox"); - int roi_num = target_box->dims()[0]; - int class_num = box_score->dims()[1]; - auto* target_box_data = target_box->data(); - auto* prior_box_data = prior_box->data(); - auto* prior_box_var_data = prior_box_var->data(); - auto* box_score_data = box_score->data(); - output_box->mutable_data({roi_num, class_num * 4}, context.GetPlace()); - output_assign_box->mutable_data({roi_num, 4}, context.GetPlace()); - T* output_box_data = output_box->data(); - T* output_assign_box_data = output_assign_box->data(); - const T bbox_clip = static_cast(context.Attr("box_clip")); - - for (int i = 0; i < roi_num; ++i) { - T prior_box_width = prior_box_data[i * 4 + 2] - prior_box_data[i * 4] + 1; - T prior_box_height = - prior_box_data[i * 4 + 3] - prior_box_data[i * 4 + 1] + 1; - T prior_box_center_x = prior_box_data[i * 4] + prior_box_width / 2; - T prior_box_center_y = prior_box_data[i * 4 + 1] + prior_box_height / 2; - for (int j = 0; j < class_num; ++j) { - int64_t offset = i * class_num * 4 + j * 4; - T dw = std::min(prior_box_var_data[2] * target_box_data[offset + 2], - bbox_clip); - T dh = std::min(prior_box_var_data[3] * target_box_data[offset + 3], - bbox_clip); - T target_box_center_x = 0, target_box_center_y = 0; - T target_box_width = 0, target_box_height = 0; - target_box_center_x = - prior_box_var_data[0] * target_box_data[offset] * prior_box_width + - prior_box_center_x; - target_box_center_y = prior_box_var_data[1] * - target_box_data[offset + 1] * - prior_box_height + - prior_box_center_y; - target_box_width = std::exp(dw) * prior_box_width; - target_box_height = std::exp(dh) * prior_box_height; - - output_box_data[offset] = target_box_center_x - target_box_width / 2; - output_box_data[offset + 1] = - target_box_center_y - target_box_height / 2; - output_box_data[offset + 2] = - target_box_center_x + target_box_width / 2 - 1; - output_box_data[offset + 3] = - target_box_center_y + target_box_height / 2 - 1; - } - - T max_score = -1; - int max_j = -1; - for (int j = 0; j < class_num; ++j) { - T score = box_score_data[i * class_num + j]; - if (score > max_score && j > 0) { - max_score = score; - max_j = j; - } - } - - if (max_j > 0) { - for (int pno = 0; pno < 4; pno++) { - output_assign_box_data[i * 4 + pno] = - output_box_data[i * class_num * 4 + max_j * 4 + pno]; - } - } else { - for (int pno = 0; pno < 4; pno++) { - output_assign_box_data[i * 4 + pno] = prior_box_data[i * 4 + pno]; - } - } - } - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/detection/density_prior_box_op.cc b/paddle/fluid/operators/detection/density_prior_box_op.cc deleted file mode 100644 index 4a533615aab158..00000000000000 --- a/paddle/fluid/operators/detection/density_prior_box_op.cc +++ /dev/null @@ -1,279 +0,0 @@ -/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/detection/density_prior_box_op.h" - -namespace paddle { -namespace operators { - -class DensityPriorBoxOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK( - ctx->HasInput("Input"), "Input", "Input", "DensityPriorBoxOp"); - OP_INOUT_CHECK( - ctx->HasInput("Image"), "Input", "Image", "DensityPriorBoxOp"); - - auto image_dims = ctx->GetInputDim("Image"); - auto input_dims = ctx->GetInputDim("Input"); - PADDLE_ENFORCE_EQ( - image_dims.size(), - 4, - phi::errors::InvalidArgument( - "The Input(Image) of Op(density_prior_box) should be a 4-D Tensor " - "and data format is NCHW. But received Image's dimensions = %d, " - "shape = [%s].", - image_dims.size(), - image_dims)); - PADDLE_ENFORCE_EQ( - input_dims.size(), - 4, - phi::errors::InvalidArgument( - "The Input(Input) of Op(density_prior_box) should be a 4-D Tensor " - "and data format is NCHW. But received Input's dimensions = %d, " - "shape = [%s].", - input_dims.size(), - input_dims)); - - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_LT( - input_dims[2], - image_dims[2], - phi::errors::InvalidArgument( - "The input tensor Input's height" - "of DensityPriorBoxOp should be smaller than input tensor Image's" - "height. But received Input's height = %d, Image's height = %d", - input_dims[2], - image_dims[2])); - - PADDLE_ENFORCE_LT( - input_dims[3], - image_dims[3], - phi::errors::InvalidArgument( - "The input tensor Input's width" - "of DensityPriorBoxOp should be smaller than input tensor Image's" - "width. But received Input's width = %d, Image's width = %d", - input_dims[3], - image_dims[3])); - } - auto variances = ctx->Attrs().Get>("variances"); - - auto fixed_sizes = ctx->Attrs().Get>("fixed_sizes"); - auto fixed_ratios = ctx->Attrs().Get>("fixed_ratios"); - auto densities = ctx->Attrs().Get>("densities"); - bool flatten = ctx->Attrs().Get("flatten_to_2d"); - - PADDLE_ENFORCE_EQ( - fixed_sizes.size(), - densities.size(), - phi::errors::InvalidArgument( - "The length of fixed_sizes and densities must be equal. " - "But received: fixed_sizes's length is %d, densities's length " - "is %d", - fixed_sizes.size(), - densities.size())); - size_t num_priors = 0; - for (auto density : densities) { - num_priors += (fixed_ratios.size()) * (pow(density, 2)); // NOLINT - } - if (!flatten) { - std::vector dim_vec(4); - dim_vec[0] = input_dims[2]; - dim_vec[1] = input_dims[3]; - dim_vec[2] = static_cast(num_priors); - dim_vec[3] = 4; - ctx->SetOutputDim("Boxes", common::make_ddim(dim_vec)); - ctx->SetOutputDim("Variances", common::make_ddim(dim_vec)); - } else if (ctx->IsRuntime()) { - int64_t dim0 = - static_cast(input_dims[2] * input_dims[3] * num_priors); - ctx->SetOutputDim("Boxes", {dim0, 4}); - ctx->SetOutputDim("Variances", {dim0, 4}); - } else { - ctx->SetOutputDim("Boxes", {-1, 4}); - ctx->SetOutputDim("Variances", {-1, 4}); - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "Input"), - ctx.GetPlace()); - } -}; - -class DensityPriorBoxOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput( - "Input", - "(Tensor, default Tensor), " - "the input feature data of DensityPriorBoxOp, the layout is NCHW."); - AddInput("Image", - "(Tensor, default Tensor), " - "the input image data of DensityPriorBoxOp, the layout is NCHW."); - AddOutput("Boxes", - "(Tensor, default Tensor), the output prior boxes of " - "DensityPriorBoxOp. The layout is [H, W, num_priors, 4]. " - "H is the height of input, W is the width of input, num_priors " - "is the box count of each position."); - AddOutput("Variances", - "(Tensor, default Tensor), the expanded variances of " - "DensityPriorBoxOp. The layout is [H, W, num_priors, 4]. " - "H is the height of input, W is the width of input, num_priors " - "is the box count of each position."); - AddAttr>("variances", - "(vector) List of variances to be " - "encoded in density prior boxes.") - .AddCustomChecker([](const std::vector& variances) { - PADDLE_ENFORCE_EQ(variances.size(), - 4, - phi::errors::InvalidArgument( - "The length of variance must " - "be 4. But received: variances' length is %d.", - variances.size())); - for (size_t i = 0; i < variances.size(); ++i) { - PADDLE_ENFORCE_GT(variances[i], - 0.0, - phi::errors::OutOfRange( - "variance[%d] must be greater " - "than 0. But received: variance[%d] = %f", - i, - i, - variances[i])); - } - }); - AddAttr("clip", "(bool) Whether to clip out-of-boundary boxes.") - .SetDefault(true); - AddAttr("flatten_to_2d", - "(bool) Whether to flatten to 2D and " - "the second dim is 4.") - .SetDefault(false); - AddAttr( - "step_w", - "Density prior boxes step across width, 0.0 for auto calculation.") - .SetDefault(0.0) - .AddCustomChecker([](const float& step_w) { - PADDLE_ENFORCE_GE( - step_w, - 0.0, - phi::errors::InvalidArgument("step_w should be larger " - "than 0. But received: step_w = %f.", - step_w)); - }); - AddAttr( - "step_h", - "Density prior boxes step across height, 0.0 for auto calculation.") - .SetDefault(0.0) - .AddCustomChecker([](const float& step_h) { - PADDLE_ENFORCE_GE( - step_h, - 0.0, - phi::errors::InvalidArgument("step_h should be larger " - "than 0. But received: step_h = %f.", - step_h)); - }); - - AddAttr("offset", - "(float) " - "Density prior boxes center offset.") - .SetDefault(0.5); - AddAttr>("fixed_sizes", - "(vector) List of fixed sizes " - "of generated density prior boxes.") - .SetDefault(std::vector{}) - .AddCustomChecker([](const std::vector& fixed_sizes) { - for (size_t i = 0; i < fixed_sizes.size(); ++i) { - PADDLE_ENFORCE_GT( - fixed_sizes[i], - 0.0, - phi::errors::OutOfRange( - "fixed_sizes[%d] should be " - "larger than 0. But received: fixed_sizes[%d] = %f", - i, - i, - fixed_sizes[i])); - } - }); - - AddAttr>("fixed_ratios", - "(vector) List of fixed ratios " - "of generated density prior boxes.") - .SetDefault(std::vector{}) - .AddCustomChecker([](const std::vector& fixed_ratios) { - for (size_t i = 0; i < fixed_ratios.size(); ++i) { - PADDLE_ENFORCE_GT( - fixed_ratios[i], - 0.0, - phi::errors::OutOfRange( - "fixed_ratios[%d] should be " - "larger than 0. But received: fixed_ratios[%d] = %f", - i, - i, - fixed_ratios[i])); - } - }); - - AddAttr>("densities", - "(vector) List of densities " - "of generated density prior boxes.") - .SetDefault(std::vector{}) - .AddCustomChecker([](const std::vector& densities) { - for (size_t i = 0; i < densities.size(); ++i) { - PADDLE_ENFORCE_GT( - densities[i], - 0, - phi::errors::OutOfRange( - "densities[%d] should be " - "larger than 0. But received: densities[%d] = %f.", - i, - i, - densities[i])); - } - }); - AddComment(R"DOC( - Density Prior box operator - Each position of the input produce N density prior boxes, N is determined by - the count of fixed_ratios, densities, the calculation of N is as follows: - for density in densities: - N += size(fixed_ratios)*density^2 - )DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - density_prior_box, - ops::DensityPriorBoxOp, - ops::DensityPriorBoxOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); - -PD_REGISTER_STRUCT_KERNEL(density_prior_box, - CPU, - ALL_LAYOUT, - ops::DensityPriorBoxOpKernel, - float, - double) {} - -REGISTER_OP_KERNEL(prior_box, - MKLDNN, - ::paddle::platform::CPUPlace, - ops::PriorBoxOpKernel, - ops::PriorBoxOpKernel, - ops::PriorBoxOpKernel, - ops::PriorBoxOpKernel); diff --git a/paddle/fluid/operators/detection/density_prior_box_op.cu b/paddle/fluid/operators/detection/density_prior_box_op.cu deleted file mode 100644 index 016b2e0bc93529..00000000000000 --- a/paddle/fluid/operators/detection/density_prior_box_op.cu +++ /dev/null @@ -1,197 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/detection/density_prior_box_op.h" - -namespace paddle { -namespace operators { - -template -static __device__ inline T Clip(T in) { - return min(max(in, 0.), 1.); -} - -template -static __global__ void GenDensityPriorBox(const int height, - const int width, - const int im_height, - const int im_width, - const T offset, - const T step_width, - const T step_height, - const int num_priors, - const T* ratios_shift, - bool is_clip, - const T var_xmin, - const T var_ymin, - const T var_xmax, - const T var_ymax, - T* out, - T* var) { - int gidx = blockIdx.x * blockDim.x + threadIdx.x; - int gidy = blockIdx.y * blockDim.y + threadIdx.y; - int step_x = blockDim.x * gridDim.x; - int step_y = blockDim.y * gridDim.y; - - const T* width_ratio = ratios_shift; - const T* height_ratio = ratios_shift + num_priors; - const T* width_shift = ratios_shift + 2 * num_priors; - const T* height_shift = ratios_shift + 3 * num_priors; - - for (int j = gidy; j < height; j += step_y) { - for (int i = gidx; i < width * num_priors; i += step_x) { - int h = j; - int w = i / num_priors; - int k = i % num_priors; - - T center_x = (w + offset) * step_width; - T center_y = (h + offset) * step_height; - - T center_x_temp = center_x + width_shift[k]; - T center_y_temp = center_y + height_shift[k]; - - T box_width_ratio = width_ratio[k] / 2.; - T box_height_ratio = height_ratio[k] / 2.; - - T xmin = max((center_x_temp - box_width_ratio) / im_width, 0.); - T ymin = max((center_y_temp - box_height_ratio) / im_height, 0.); - T xmax = min((center_x_temp + box_width_ratio) / im_width, 1.); - T ymax = min((center_y_temp + box_height_ratio) / im_height, 1.); - - int out_offset = (j * width * num_priors + i) * 4; - out[out_offset] = is_clip ? Clip(xmin) : xmin; - out[out_offset + 1] = is_clip ? Clip(ymin) : ymin; - out[out_offset + 2] = is_clip ? Clip(xmax) : xmax; - out[out_offset + 3] = is_clip ? Clip(ymax) : ymax; - - var[out_offset] = var_xmin; - var[out_offset + 1] = var_ymin; - var[out_offset + 2] = var_xmax; - var[out_offset + 3] = var_ymax; - } - } -} - -template -class DensityPriorBoxOpCUDAKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* image = ctx.Input("Image"); - auto* boxes = ctx.Output("Boxes"); - auto* vars = ctx.Output("Variances"); - - auto variances = ctx.Attr>("variances"); - auto is_clip = ctx.Attr("clip"); - - auto fixed_sizes = ctx.Attr>("fixed_sizes"); - auto fixed_ratios = ctx.Attr>("fixed_ratios"); - auto densities = ctx.Attr>("densities"); - - T step_w = static_cast(ctx.Attr("step_w")); - T step_h = static_cast(ctx.Attr("step_h")); - T offset = static_cast(ctx.Attr("offset")); - - auto img_width = image->dims()[3]; - auto img_height = image->dims()[2]; - - auto feature_width = input->dims()[3]; - auto feature_height = input->dims()[2]; - - T step_width, step_height; - if (step_w == 0 || step_h == 0) { - step_width = static_cast(img_width) / feature_width; - step_height = static_cast(img_height) / feature_height; - } else { - step_width = step_w; - step_height = step_h; - } - - int num_priors = 0; - for (size_t i = 0; i < densities.size(); ++i) { - num_priors += (fixed_ratios.size()) * (pow(densities[i], 2)); - } - int step_average = static_cast((step_width + step_height) * 0.5); - - phi::DenseTensor h_temp; - T* tdata = h_temp.mutable_data({num_priors * 4}, platform::CPUPlace()); - int idx = 0; - for (size_t s = 0; s < fixed_sizes.size(); ++s) { - auto fixed_size = fixed_sizes[s]; - int density = densities[s]; - for (size_t r = 0; r < fixed_ratios.size(); ++r) { - float ar = fixed_ratios[r]; - int shift = step_average / density; - float box_width_ratio = fixed_size * sqrt(ar); - float box_height_ratio = fixed_size / sqrt(ar); - for (int di = 0; di < density; ++di) { - for (int dj = 0; dj < density; ++dj) { - float center_x_temp = shift / 2. + dj * shift - step_average / 2.; - float center_y_temp = shift / 2. + di * shift - step_average / 2.; - tdata[idx] = box_width_ratio; - tdata[num_priors + idx] = box_height_ratio; - tdata[2 * num_priors + idx] = center_x_temp; - tdata[3 * num_priors + idx] = center_y_temp; - idx++; - } - } - } - } - - boxes->mutable_data(ctx.GetPlace()); - vars->mutable_data(ctx.GetPlace()); - - phi::DenseTensor d_temp; - framework::TensorCopy(h_temp, ctx.GetPlace(), &d_temp); - - // At least use 32 threads, at most 512 threads. - // blockx is multiple of 32. - int blockx = std::min( - static_cast(((feature_width * num_priors + 31) >> 5) << 5), - static_cast(512L)); - int gridx = (feature_width * num_priors + blockx - 1) / blockx; - dim3 threads(blockx, 1); - dim3 grids(gridx, feature_height); - - auto stream = ctx.template device_context().stream(); - GenDensityPriorBox<<>>(feature_height, - feature_width, - img_height, - img_width, - offset, - step_width, - step_height, - num_priors, - d_temp.data(), - is_clip, - variances[0], - variances[1], - variances[2], - variances[3], - boxes->data(), - vars->data()); - } -}; // namespace operators - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -PD_REGISTER_STRUCT_KERNEL(density_prior_box, - GPU, - ALL_LAYOUT, - ops::DensityPriorBoxOpCUDAKernel, - float, - double) {} diff --git a/paddle/fluid/operators/detection/density_prior_box_op.h b/paddle/fluid/operators/detection/density_prior_box_op.h deleted file mode 100644 index 995abf11200130..00000000000000 --- a/paddle/fluid/operators/detection/density_prior_box_op.h +++ /dev/null @@ -1,155 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include - -#include "paddle/fluid/operators/detection/prior_box_op.h" - -namespace paddle { -namespace operators { - -template -class DensityPriorBoxOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* image = ctx.Input("Image"); - auto* boxes = ctx.Output("Boxes"); - auto* vars = ctx.Output("Variances"); - - auto variances = ctx.Attr>("variances"); - auto clip = ctx.Attr("clip"); - - auto fixed_sizes = ctx.Attr>("fixed_sizes"); - auto fixed_ratios = ctx.Attr>("fixed_ratios"); - auto densities = ctx.Attr>("densities"); - - T step_w = static_cast(ctx.Attr("step_w")); - T step_h = static_cast(ctx.Attr("step_h")); - T offset = static_cast(ctx.Attr("offset")); - - auto img_width = image->dims()[3]; - auto img_height = image->dims()[2]; - - auto feature_width = input->dims()[3]; - auto feature_height = input->dims()[2]; - - T step_width, step_height; - if (step_w == 0 || step_h == 0) { - step_width = static_cast(img_width) / feature_width; - step_height = static_cast(img_height) / feature_height; - } else { - step_width = step_w; - step_height = step_h; - } - int num_priors = 0; - -#ifdef PADDLE_WITH_MKLML -#pragma omp parallel for reduction(+ : num_priors) -#endif - for (size_t i = 0; i < densities.size(); ++i) { - num_priors += (fixed_ratios.size()) * (pow(densities[i], 2)); - } - - boxes->mutable_data(ctx.GetPlace()); - vars->mutable_data(ctx.GetPlace()); - - auto box_dim = vars->dims(); - boxes->Resize({feature_height, feature_width, num_priors, 4}); - auto e_boxes = phi::EigenTensor::From(*boxes).setConstant(0.0); - int step_average = static_cast((step_width + step_height) * 0.5); - - std::vector sqrt_fixed_ratios; -#ifdef PADDLE_WITH_MKLML -#pragma omp parallel for -#endif - for (size_t i = 0; i < fixed_ratios.size(); i++) { - sqrt_fixed_ratios.push_back(sqrt(fixed_ratios[i])); - } - -#ifdef PADDLE_WITH_MKLML -#pragma omp parallel for collapse(2) -#endif - for (int h = 0; h < feature_height; ++h) { - for (int w = 0; w < feature_width; ++w) { - T center_x = (w + offset) * step_width; - T center_y = (h + offset) * step_height; - int idx = 0; - // Generate density prior boxes with fixed sizes. - for (size_t s = 0; s < fixed_sizes.size(); ++s) { - auto fixed_size = fixed_sizes[s]; - int density = densities[s]; - int shift = step_average / density; - // Generate density prior boxes with fixed ratios. - for (size_t r = 0; r < fixed_ratios.size(); ++r) { - float box_width_ratio = fixed_size * sqrt_fixed_ratios[r]; - float box_height_ratio = fixed_size / sqrt_fixed_ratios[r]; - float density_center_x = center_x - step_average / 2. + shift / 2.; - float density_center_y = center_y - step_average / 2. + shift / 2.; - for (int di = 0; di < density; ++di) { - for (int dj = 0; dj < density; ++dj) { - float center_x_temp = density_center_x + dj * shift; - float center_y_temp = density_center_y + di * shift; - e_boxes(h, w, idx, 0) = std::max( - (center_x_temp - box_width_ratio / 2.) / img_width, 0.); - e_boxes(h, w, idx, 1) = std::max( - (center_y_temp - box_height_ratio / 2.) / img_height, 0.); - e_boxes(h, w, idx, 2) = std::min( - (center_x_temp + box_width_ratio / 2.) / img_width, 1.); - e_boxes(h, w, idx, 3) = std::min( - (center_y_temp + box_height_ratio / 2.) / img_height, 1.); - idx++; - } - } - } - } - } - } - if (clip) { - T* dt = boxes->data(); - std::transform(dt, dt + boxes->numel(), dt, [](T v) -> T { - return std::min(std::max(v, 0.), 1.); - }); - } - phi::DenseTensor var_t; - var_t.mutable_data( - common::make_ddim({1, static_cast(variances.size())}), - ctx.GetPlace()); - - auto var_et = phi::EigenTensor::From(var_t); - - for (size_t i = 0; i < variances.size(); ++i) { - var_et(0, i) = variances[i]; - } - - int box_num = feature_height * feature_width * num_priors; - auto var_dim = vars->dims(); - vars->Resize({box_num, static_cast(variances.size())}); - - auto e_vars = phi::EigenMatrix::From(*vars); -#ifdef PADDLE_WITH_MKLML -#pragma omp parallel for collapse(2) -#endif - for (int i = 0; i < box_num; ++i) { - for (size_t j = 0; j < variances.size(); ++j) { - e_vars(i, j) = variances[j]; - } - } - - vars->Resize(var_dim); - boxes->Resize(box_dim); - } -}; // namespace operators - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/detection/generate_mask_labels_op.cc b/paddle/fluid/operators/detection/generate_mask_labels_op.cc deleted file mode 100644 index 5ee843d72387bb..00000000000000 --- a/paddle/fluid/operators/detection/generate_mask_labels_op.cc +++ /dev/null @@ -1,547 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include -#include -#include - -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/detection/bbox_util.h" -#include "paddle/fluid/operators/detection/mask_util.h" -#include "paddle/fluid/operators/math/concat_and_split.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -const int kBoxDim = 4; - -template -void AppendMask(phi::DenseTensor* out, - int64_t offset, - phi::DenseTensor* to_add) { - auto* out_data = out->data(); - auto* to_add_data = to_add->data(); - memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T)); -} - -class GenerateMaskLabelsOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("ImInfo"), - true, - phi::errors::InvalidArgument("Input(ImInfo) shouldn't be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("GtClasses"), - true, - phi::errors::InvalidArgument("Input(GtClasses) shouldn't be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("IsCrowd"), - true, - phi::errors::InvalidArgument("Input(IsCrowd) shouldn't be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("GtSegms"), - true, - phi::errors::InvalidArgument("Input(GtSegms) shouldn't be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("Rois"), - true, - phi::errors::InvalidArgument("Input(Rois) shouldn't be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("LabelsInt32"), - true, - phi::errors::InvalidArgument("Input(LabelsInt32) shouldn't be null.")); - - PADDLE_ENFORCE_EQ( - ctx->HasOutput("MaskRois"), - true, - phi::errors::InvalidArgument( - "Output(MaskRois) of GenerateMaskLabelsOp should not be null")); - PADDLE_ENFORCE_EQ(ctx->HasOutput("RoiHasMaskInt32"), - true, - phi::errors::InvalidArgument( - "Output(RoiHasMaskInt32) of GenerateMaskLabelsOp " - "should not be null")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("MaskInt32"), - true, - phi::errors::InvalidArgument( - "Output(MaskInt32) of GenerateMaskLabelsOp should not be null")); - - auto im_info_dims = ctx->GetInputDim("ImInfo"); - auto gt_segms_dims = ctx->GetInputDim("GtSegms"); - PADDLE_ENFORCE_EQ( - im_info_dims.size(), - 2, - phi::errors::InvalidArgument("The rank of Input(ImInfo) must be 2.")); - PADDLE_ENFORCE_EQ( - gt_segms_dims.size(), - 2, - phi::errors::InvalidArgument("The rank of Input(GtSegms) must be 2.")); - PADDLE_ENFORCE_EQ(gt_segms_dims[1], - 2, - phi::errors::InvalidArgument( - "The second dim of Input(GtSegms) must be 2.")); - int num_classes = ctx->Attrs().Get("num_classes"); - int resolution = ctx->Attrs().Get("resolution"); - - ctx->SetOutputDim("MaskRois", {-1, 4}); - ctx->SetOutputDim("RoiHasMaskInt32", {-1, 1}); - ctx->SetOutputDim("MaskInt32", {-1, num_classes * resolution * resolution}); - if (!ctx->IsRuntime()) { - ctx->SetLoDLevel("MaskRois", ctx->GetLoDLevel("Rois")); - ctx->SetLoDLevel("RoiHasMaskInt32", ctx->GetLoDLevel("Rois")); - ctx->SetLoDLevel("MaskInt32", ctx->GetLoDLevel("Rois")); - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Rois"); - return phi::KernelKey(data_type, platform::CPUPlace()); - } -}; - -/* - * Expand masks from shape (#masks, M ** 2) to (#masks, #classes * M ** 2) - * to encode class specific mask targets. - */ -template -static inline void ExpandMaskTarget(const phi::CPUContext& ctx, - const phi::DenseTensor& masks, - const phi::DenseTensor& mask_class_labels, - const int resolution, - const int num_classes, - phi::DenseTensor* mask_targets) { - const uint8_t* masks_data = masks.data(); - int64_t num_mask = masks.dims()[0]; - const int* mask_class_labels_data = mask_class_labels.data(); - const int M = resolution * resolution; - const int mask_dim = M * num_classes; - - int* mask_targets_data = - mask_targets->mutable_data({num_mask, mask_dim}, ctx.GetPlace()); - phi::funcs::set_constant(ctx, mask_targets, static_cast(-1)); - for (int64_t mask_id = 0; mask_id < num_mask; ++mask_id) { - int cls = mask_class_labels_data[mask_id]; - int start = M * cls; - if (cls > 0) { - for (int i = 0; i < M; ++i) { - mask_targets_data[mask_id * mask_dim + start + i] = - static_cast(masks_data[mask_id * M + i]); - } - } - } -} - -template -std::vector SampleMaskForOneImage( - const phi::CPUContext& ctx, - const phi::DenseTensor& im_info, - const phi::DenseTensor& gt_classes, - const phi::DenseTensor& is_crowd, - const phi::DenseTensor& gt_segms, - const phi::DenseTensor& rois, - const phi::DenseTensor& label_int32, - const int num_classes, - const int resolution, - const framework::LoD& segm_length) { - // Prepare the mask targets by associating one gt mask to each training roi - // that has a fg (non-bg) class label. - const int64_t gt_size = static_cast(gt_classes.dims()[0]); - const int64_t roi_size = static_cast(rois.dims()[0]); - const int* gt_classes_data = gt_classes.data(); - const int* is_crowd_data = is_crowd.data(); - const int* label_int32_data = label_int32.data(); - PADDLE_ENFORCE_EQ(roi_size, - label_int32.dims()[0], - phi::errors::InvalidArgument( - "The first dim of label [%d] is the different from " - "roi_size [%d], they should be same.", - label_int32.dims()[0], - roi_size)); - - std::vector mask_gt_inds, fg_inds; - std::vector>> gt_polys; - - auto polys_num = segm_length[1]; - auto segm_lod_offset = framework::ConvertToOffsetBasedLoD(segm_length); - auto lod1 = segm_lod_offset[1]; - auto lod2 = segm_lod_offset[2]; - const T* polys_data = gt_segms.data(); - for (int64_t i = 0; i < gt_size; ++i) { - if ((gt_classes_data[i] > 0) && (is_crowd_data[i] == 0)) { - mask_gt_inds.emplace_back(i); - - // slice fg segmentation polys - int poly_num = static_cast(polys_num[i]); - std::vector> polys; - int s_idx = static_cast(lod1[i]); - for (int j = 0; j < poly_num; ++j) { - int s = static_cast(lod2[s_idx + j]); - int e = static_cast(lod2[s_idx + j + 1]); - PADDLE_ENFORCE_NE(s, - e, - phi::errors::InvalidArgument( - "The start point and the end point in the poly " - "segment [%d] should not be same, but received " - "the start point [%d] and the end point [%d].", - i, - s, - e)); - std::vector plts(polys_data + s * 2, polys_data + e * 2); - polys.push_back(plts); - } - gt_polys.push_back(polys); - } - } - for (int64_t i = 0; i < roi_size; ++i) { - if (label_int32_data[i] > 0) { - fg_inds.emplace_back(i); - } - } - int gt_num = static_cast(mask_gt_inds.size()); - int fg_num = static_cast(fg_inds.size()); - - phi::DenseTensor boxes_from_polys; - boxes_from_polys.mutable_data({gt_num, 4}, platform::CPUPlace()); - Poly2Boxes(gt_polys, boxes_from_polys.data()); - - std::vector roi_has_mask = - std::vector(fg_inds.begin(), fg_inds.end()); - phi::DenseTensor mask_class_labels; - phi::DenseTensor masks; - phi::DenseTensor rois_fg; - - auto im_scale = im_info.data()[2]; - if (fg_num > 0) { - // Class labels for the foreground rois - mask_class_labels.mutable_data({fg_num, 1}, ctx.GetPlace()); - Gather(label_int32_data, - 1, - fg_inds.data(), - static_cast(fg_inds.size()), - mask_class_labels.data()); - - uint8_t* masks_data = masks.mutable_data( - {fg_num, resolution * resolution}, ctx.GetPlace()); - - // Find overlap between all foreground rois and the bounding boxes - // enclosing each segmentation - T* rois_fg_data = rois_fg.mutable_data({fg_num, 4}, ctx.GetPlace()); - Gather( - rois.data(), 4, fg_inds.data(), fg_inds.size(), rois_fg.data()); - - for (int k = 0; k < rois_fg.numel(); ++k) { - rois_fg_data[k] = rois_fg_data[k] / im_scale; - } - - phi::DenseTensor overlaps_bbfg_bbpolys; - overlaps_bbfg_bbpolys.mutable_data({fg_num, gt_num}, ctx.GetPlace()); - BboxOverlaps(rois_fg, boxes_from_polys, &overlaps_bbfg_bbpolys); - - // Map from each fg rois to the index of the mask with highest overlap - // (measured by bbox overlap) - T* overlaps_bbfg_bbpolys_data = overlaps_bbfg_bbpolys.data(); - std::vector fg_masks_inds; - for (int64_t i = 0; i < fg_num; ++i) { - const T* v = overlaps_bbfg_bbpolys_data + i * gt_num; - T max_overlap = std::numeric_limits::min(); - int id = 0; - for (int64_t j = 0; j < gt_num; ++j) { - if (v[j] > max_overlap) { - max_overlap = v[j]; - id = static_cast(j); - } - } - fg_masks_inds.push_back(id); - } - - // add fg targets - for (int64_t i = 0; i < fg_num; ++i) { - int fg_polys_ind = fg_masks_inds[i]; - T* roi_fg = rois_fg_data + i * 4; - uint8_t* mask = masks_data + i * resolution * resolution; - Polys2MaskWrtBox(gt_polys[fg_polys_ind], roi_fg, resolution, mask); - } - } else { - // The network cannot handle empty blobs, so we must provide a mask - // We simply take the first bg roi, given it an all -1's mask (ignore - // label), and label it with class zero (bg). - int bg_num = 1; - T* rois_fg_data = rois_fg.mutable_data({bg_num, 4}, ctx.GetPlace()); - const T* rois_data = rois.data(); - std::vector bg_inds; - for (int64_t i = 0; i < roi_size; ++i) { - if (label_int32_data[i] == 0) { - bg_inds.emplace_back(i); - rois_fg_data[0] = rois_data[0] / im_scale; - rois_fg_data[1] = rois_data[1] / im_scale; - rois_fg_data[2] = rois_data[2] / im_scale; - rois_fg_data[3] = rois_data[3] / im_scale; - break; - } - } - masks.mutable_data({bg_num, resolution * resolution}, - ctx.GetPlace()); - phi::funcs::set_constant(ctx, &masks, static_cast(-1)); - int* mask_class_labels_data = - mask_class_labels.mutable_data({bg_num, 1}, ctx.GetPlace()); - mask_class_labels_data[0] = 0; - roi_has_mask = std::vector(bg_inds.begin(), bg_inds.end()); - } - - phi::DenseTensor masks_expand; - ExpandMaskTarget( - ctx, masks, mask_class_labels, resolution, num_classes, &masks_expand); - - T* rois_fg_data = rois_fg.data(); - for (int k = 0; k < rois_fg.numel(); ++k) { - rois_fg_data[k] = rois_fg_data[k] * im_scale; - } - - phi::DenseTensor roi_has_mask_t; - int roi_has_mask_size = static_cast(roi_has_mask.size()); - int* roi_has_mask_data = - roi_has_mask_t.mutable_data({roi_has_mask_size, 1}, ctx.GetPlace()); - std::copy(roi_has_mask.begin(), roi_has_mask.end(), roi_has_mask_data); - - std::vector res; - res.emplace_back(rois_fg); - res.emplace_back(roi_has_mask_t); - res.emplace_back(masks_expand); - return res; -} - -template -class GenerateMaskLabelsKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* im_info = ctx.Input("ImInfo"); - auto* gt_classes = ctx.Input("GtClasses"); - auto* is_crowd = ctx.Input("IsCrowd"); - auto* gt_segms = ctx.Input("GtSegms"); - auto* rois = ctx.Input("Rois"); - auto* label_int32 = ctx.Input("LabelsInt32"); - - auto* mask_rois = ctx.Output("MaskRois"); - auto* roi_has_mask_int32 = ctx.Output("RoiHasMaskInt32"); - auto* mask_int32 = ctx.Output("MaskInt32"); - - int num_classes = ctx.Attr("num_classes"); - int resolution = ctx.Attr("resolution"); - - PADDLE_ENFORCE_EQ( - gt_classes->lod().size(), - 1UL, - phi::errors::InvalidArgument( - "GenerateMaskLabelsOp gt_classes needs 1 level of LoD")); - PADDLE_ENFORCE_EQ( - is_crowd->lod().size(), - 1UL, - phi::errors::InvalidArgument( - "GenerateMaskLabelsOp is_crowd needs 1 level of LoD")); - PADDLE_ENFORCE_EQ(rois->lod().size(), - 1UL, - phi::errors::InvalidArgument( - "GenerateMaskLabelsOp rois needs 1 level of LoD")); - PADDLE_ENFORCE_EQ( - label_int32->lod().size(), - 1UL, - phi::errors::InvalidArgument( - "GenerateMaskLabelsOp label_int32 needs 1 level of LoD")); - - PADDLE_ENFORCE_EQ( - gt_segms->lod().size(), - 3UL, - phi::errors::InvalidArgument( - "GenerateMaskLabelsOp gt_segms needs 3 level of LoD")); - - int64_t n = static_cast(gt_classes->lod().back().size() - 1); - PADDLE_ENFORCE_EQ( - gt_segms->lod()[0].size() - 1, - n, - phi::errors::InvalidArgument( - "Batchsize of Input(gt_segms) and Input(gt_classes) should be " - "same, but received gt_segms[%d], gt_classes[%d].", - gt_segms->lod()[0].size() - 1, - n)); - - int mask_dim = num_classes * resolution * resolution; - int roi_num = static_cast(rois->lod().back()[n]); - mask_rois->mutable_data({roi_num, kBoxDim}, ctx.GetPlace()); - roi_has_mask_int32->mutable_data({roi_num, 1}, ctx.GetPlace()); - mask_int32->mutable_data({roi_num, mask_dim}, ctx.GetPlace()); - - framework::LoD lod; - std::vector lod0(1, 0); - - int64_t num_mask = 0; - auto& dev_ctx = ctx.device_context(); - - auto gt_classes_lod = gt_classes->lod().back(); - auto is_crowd_lod = is_crowd->lod().back(); - auto rois_lod = rois->lod().back(); - auto label_int32_lod = label_int32->lod().back(); - auto gt_segms_lod = gt_segms->lod(); - - for (int i = 0; i < n; ++i) { - if (rois_lod[i] == rois_lod[i + 1]) { - lod0.emplace_back(num_mask); - continue; - } - phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1); - phi::DenseTensor gt_classes_slice = - gt_classes->Slice(static_cast(gt_classes_lod[i]), - static_cast(gt_classes_lod[i + 1])); - phi::DenseTensor is_crowd_slice = - is_crowd->Slice(static_cast(is_crowd_lod[i]), - static_cast(is_crowd_lod[i + 1])); - phi::DenseTensor label_int32_slice = - label_int32->Slice(static_cast(label_int32_lod[i]), - static_cast(label_int32_lod[i + 1])); - phi::DenseTensor rois_slice = - rois->Slice(static_cast(rois_lod[i]), - static_cast(rois_lod[i + 1])); - - auto sub_lod_and_offset = - framework::GetSubLoDAndAbsoluteOffset(gt_segms_lod, i, i + 1, 0); - auto lod_length = sub_lod_and_offset.first; - size_t s = sub_lod_and_offset.second.first; - size_t e = sub_lod_and_offset.second.second; - phi::DenseTensor gt_segms_slice = - gt_segms->Slice(static_cast(s), static_cast(e)); - - std::vector tensor_output = - SampleMaskForOneImage(dev_ctx, - im_info_slice, - gt_classes_slice, - is_crowd_slice, - gt_segms_slice, - rois_slice, - label_int32_slice, - num_classes, - resolution, - lod_length); - - phi::DenseTensor sampled_mask_rois = tensor_output[0]; - phi::DenseTensor sampled_roi_has_mask_int32 = tensor_output[1]; - phi::DenseTensor sampled_mask_int32 = tensor_output[2]; - - AppendMask(mask_rois, kBoxDim * num_mask, &sampled_mask_rois); - AppendMask( - roi_has_mask_int32, num_mask, &sampled_roi_has_mask_int32); - AppendMask(mask_int32, mask_dim * num_mask, &sampled_mask_int32); - - num_mask += sampled_mask_rois.dims()[0]; - lod0.emplace_back(num_mask); - } - - lod.emplace_back(lod0); - mask_rois->set_lod(lod); - roi_has_mask_int32->set_lod(lod); - mask_int32->set_lod(lod); - mask_rois->Resize({num_mask, kBoxDim}); - roi_has_mask_int32->Resize({num_mask, 1}); - mask_int32->Resize({num_mask, mask_dim}); - } -}; - -class GenerateMaskLabelsOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("ImInfo", - "(Tensor), This input is a 2D Tensor with shape [B, 3]. " - "B is the number of input images, " - "each element consists of im_height, im_width, im_scale."); - AddInput("GtClasses", - "(phi::DenseTensor), This input is a 2D phi::DenseTensor with " - "shape [M, 1]. " - "M is the number of groundtruth, " - "each element is a class label of groundtruth."); - AddInput( - "IsCrowd", - "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape " - "[M, 1]. " - "M is the number of groundtruth, " - "each element is a flag indicates whether a groundtruth is crowd."); - AddInput( - "GtSegms", - "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape " - "[S, 2], it's LoD " - "level is 3. The LoD[0] represents the gt objects number of each " - "instance. LoD[1] represents the segmentation counts of each objects. " - "LoD[2] represents the polygons number of each segmentation. S the " - "total number of polygons coordinate points. Each element is (x, y) " - "coordinate points."); - AddInput( - "Rois", - "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape " - "[R, 4]. " - "R is the number of rois which is the output of " - "generate_proposal_labels, " - "each element is a bounding box with (xmin, ymin, xmax, ymax) format."); - AddInput("LabelsInt32", - "(phi::DenseTensor), This intput is a 2D phi::DenseTensor with " - "shape [R, 1], " - "each element represents a class label of a roi"); - AddOutput( - "MaskRois", - "(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape " - "[P, 4]. " - "P is the number of mask, " - "each element is a bounding box with [xmin, ymin, xmax, ymax] format."); - AddOutput("RoiHasMaskInt32", - "(phi::DenseTensor), This output is a 2D phi::DenseTensor with " - "shape [P, 1], " - "each element represents the output mask rois index with regard " - "to input rois"); - AddOutput("MaskInt32", - "(phi::DenseTensor), This output is a 4D phi::DenseTensor with " - "shape [P, Q], " - "Q equal to num_classes * resolution * resolution"); - - AddAttr("num_classes", "Class number."); - AddAttr("resolution", "Resolution of mask."); - - AddComment(R"DOC( -This operator can be, for given the RoIs and corresponding labels, -to sample foreground RoIs. This mask branch also has -a :math: `K \\times M^{2}` dimensional output targets for each foreground -RoI, which encodes K binary masks of resolution M x M, one for each of the -K classes. This mask targets are used to compute loss of mask branch. - )DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - generate_mask_labels, - ops::GenerateMaskLabelsOp, - ops::GenerateMaskLabelsOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); - -PD_REGISTER_STRUCT_KERNEL(generate_mask_labels, - CPU, - ALL_LAYOUT, - ops::GenerateMaskLabelsKernel, - float) {} diff --git a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc deleted file mode 100644 index ad37aa2ae682f7..00000000000000 --- a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc +++ /dev/null @@ -1,837 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include -#include -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/operators/detection/bbox_util.h" -#include "paddle/fluid/operators/math/concat_and_split.h" -#include "paddle/phi/kernels/funcs/gather.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -const int kBoxDim = 4; - -template -void AppendRois(phi::DenseTensor* out, - int64_t offset, - phi::DenseTensor* to_add) { - auto* out_data = out->data(); - auto* to_add_data = to_add->data(); - memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T)); -} - -// Filter the ground-truth in RoIs and the RoIs with non-positive area. -// The ground-truth has max overlap with itself so the max_overlap is 1 -// and the corresponding RoI will be removed. -template -void FilterRoIs(const platform::DeviceContext& ctx, - const phi::DenseTensor& rpn_rois, - const phi::DenseTensor& max_overlap, - phi::DenseTensor* keep) { - const T* rpn_rois_dt = rpn_rois.data(); - const T* max_overlap_dt = max_overlap.data(); - int rois_num = static_cast(max_overlap.numel()); - keep->Resize({rois_num}); - int* keep_data = keep->mutable_data(ctx.GetPlace()); - int keep_len = 0; - for (int i = 0; i < rois_num; ++i) { - if ((rpn_rois_dt[i * 4 + 2] - rpn_rois_dt[i * 4 + 0] + 1) > 0 && - (rpn_rois_dt[i * 4 + 3] - rpn_rois_dt[i * 4 + 1] + 1) > 0 && - max_overlap_dt[i] < 1.) { - keep_data[keep_len++] = i; - } - } - keep->Resize({keep_len}); -} - -class GenerateProposalLabelsOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("RpnRois"), - true, - phi::errors::NotFound("Input(RpnRois) shouldn't be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("GtClasses"), - true, - phi::errors::NotFound("Input(GtClasses) shouldn't be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("IsCrowd"), - true, - phi::errors::NotFound("Input(IsCrowd) shouldn't be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("GtBoxes"), - true, - phi::errors::NotFound("Input(GtBoxes) shouldn't be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("ImInfo"), - true, - phi::errors::NotFound("Input(ImInfo) shouldn't be null.")); - - PADDLE_ENFORCE_EQ( - ctx->HasOutput("Rois"), - true, - phi::errors::NotFound( - "Output(Rois) of GenerateProposalLabelsOp should not be null")); - PADDLE_ENFORCE_EQ(ctx->HasOutput("LabelsInt32"), - true, - phi::errors::NotFound("Output(LabelsInt32) of " - "GenerateProposalLabelsOp " - "should not be null")); - PADDLE_ENFORCE_EQ(ctx->HasOutput("BboxTargets"), - true, - phi::errors::NotFound("Output(BboxTargets) of " - "GenerateProposalLabelsOp " - "should not be null")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("BboxInsideWeights"), - true, - phi::errors::NotFound( - "Output(BboxInsideWeights) of GenerateProposalLabelsOp " - "should not be null")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("BboxOutsideWeights"), - true, - phi::errors::NotFound( - "Output(BboxOutsideWeights) of GenerateProposalLabelsOp " - "should not be null")); - - auto rpn_rois_dims = ctx->GetInputDim("RpnRois"); - auto gt_boxes_dims = ctx->GetInputDim("GtBoxes"); - auto im_info_dims = ctx->GetInputDim("ImInfo"); - - PADDLE_ENFORCE_EQ(rpn_rois_dims.size(), - 2, - phi::errors::InvalidArgument( - "The dimensions size of Input(RpnRois) must be 2. " - "But received dimensions size=[%d], dimensions=[%s].", - rpn_rois_dims.size(), - rpn_rois_dims)); - PADDLE_ENFORCE_EQ(gt_boxes_dims.size(), - 2, - phi::errors::InvalidArgument( - "The dimensions size of Input(GtBoxes) must be 2. " - "But received dimensions size=[%d], dimensions=[%s].", - gt_boxes_dims.size(), - gt_boxes_dims)); - PADDLE_ENFORCE_EQ(im_info_dims.size(), - 2, - phi::errors::InvalidArgument( - "The dimensions size of Input(ImInfo) must be 2. But " - "received dimensions size=[%d], dimensions=[%s].", - im_info_dims.size(), - im_info_dims)); - - int class_nums = ctx->Attrs().Get("class_nums"); - bool is_cascade_rcnn = ctx->Attrs().Get("is_cascade_rcnn"); - if (is_cascade_rcnn) { - PADDLE_ENFORCE_EQ( - ctx->HasInput("MaxOverlap"), - true, - phi::errors::NotFound( - "Input(MaxOverlap) of GenerateProposalLabelsOp " - "should not be null when is_cascade_rcnn is True.")); - } - - ctx->SetOutputDim("Rois", {-1, 4}); - ctx->SetOutputDim("LabelsInt32", {-1, 1}); - ctx->SetOutputDim("BboxTargets", {-1, 4 * class_nums}); - ctx->SetOutputDim("BboxInsideWeights", {-1, 4 * class_nums}); - ctx->SetOutputDim("BboxOutsideWeights", {-1, 4 * class_nums}); - ctx->SetOutputDim("MaxOverlapWithGT", {-1}); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "RpnRois"); - return phi::KernelKey(data_type, platform::CPUPlace()); - } -}; - -template -void Concat(const phi::CPUContext& context, - const phi::DenseTensor& in_tensor_a, - const phi::DenseTensor& in_tensor_b, - phi::DenseTensor* out_tensor) { - int axis = 0; - std::vector inputs; - inputs.emplace_back(in_tensor_a); - inputs.emplace_back(in_tensor_b); - math::ConcatFunctor concat_functor; - concat_functor(context, inputs, axis, out_tensor); -} - -template -std::vector> SampleFgBgGt(const phi::CPUContext& context, - phi::DenseTensor* iou, - const phi::DenseTensor& is_crowd, - const int batch_size_per_im, - const float fg_fraction, - const float fg_thresh, - const float bg_thresh_hi, - const float bg_thresh_lo, - std::minstd_rand engine, - const bool use_random, - const bool is_cascade_rcnn, - const phi::DenseTensor& rpn_rois) { - std::vector fg_inds; - std::vector bg_inds; - std::vector mapped_gt_inds; - int64_t gt_num = is_crowd.numel(); - const int* crowd_data = is_crowd.data(); - T* proposal_to_gt_overlaps = iou->data(); - int64_t row = iou->dims()[0]; - int64_t col = iou->dims()[1]; - float epsilon = 0.00001; - // Follow the Faster RCNN's implementation - for (int64_t i = 0; i < row; ++i) { - const T* v = proposal_to_gt_overlaps + i * col; - - T max_overlap = *std::max_element(v, v + col); - if ((i < gt_num) && (crowd_data[i])) { - max_overlap = -1.0; - } - if (max_overlap >= fg_thresh) { - // fg mapped gt label index - for (int64_t j = 0; j < col; ++j) { - T val = proposal_to_gt_overlaps[i * col + j]; - auto diff = std::abs(max_overlap - val); - if (diff < epsilon) { - fg_inds.emplace_back(i); - mapped_gt_inds.emplace_back(j); - break; - } - } - } else if ((max_overlap >= bg_thresh_lo) && (max_overlap < bg_thresh_hi)) { - bg_inds.emplace_back(i); - } else { - continue; - } - } - - std::vector> res; - if (is_cascade_rcnn) { - res.emplace_back(fg_inds); - res.emplace_back(bg_inds); - res.emplace_back(mapped_gt_inds); - } else { - // Reservoir Sampling - // sampling fg - std::uniform_real_distribution uniform(0, 1); - int fg_rois_per_im = std::floor(batch_size_per_im * fg_fraction); // NOLINT - int fg_rois_this_image = static_cast(fg_inds.size()); - int fg_rois_per_this_image = std::min(fg_rois_per_im, fg_rois_this_image); - if (use_random) { - const int64_t fg_size = static_cast(fg_inds.size()); - if (fg_size > fg_rois_per_this_image) { - for (int64_t i = fg_rois_per_this_image; i < fg_size; ++i) { - int rng_ind = std::floor(uniform(engine) * i); // NOLINT - if (rng_ind < fg_rois_per_this_image) { - std::iter_swap(fg_inds.begin() + rng_ind, fg_inds.begin() + i); - std::iter_swap(mapped_gt_inds.begin() + rng_ind, - mapped_gt_inds.begin() + i); - } - } - } - } - std::vector new_fg_inds(fg_inds.begin(), - fg_inds.begin() + fg_rois_per_this_image); - std::vector new_gt_inds( - mapped_gt_inds.begin(), - mapped_gt_inds.begin() + fg_rois_per_this_image); - // sampling bg - int bg_rois_per_image = batch_size_per_im - fg_rois_per_this_image; - int bg_rois_this_image = static_cast(bg_inds.size()); - int bg_rois_per_this_image = - std::min(bg_rois_per_image, bg_rois_this_image); - if (use_random) { - const int64_t bg_size = static_cast(bg_inds.size()); - if (bg_size > bg_rois_per_this_image) { - for (int64_t i = bg_rois_per_this_image; i < bg_size; ++i) { - int rng_ind = std::floor(uniform(engine) * i); // NOLINT - if (rng_ind < fg_rois_per_this_image) - std::iter_swap(bg_inds.begin() + rng_ind, bg_inds.begin() + i); - } - } - } - std::vector new_bg_inds(bg_inds.begin(), - bg_inds.begin() + bg_rois_per_this_image); - // - res.emplace_back(new_fg_inds); - res.emplace_back(new_bg_inds); - res.emplace_back(new_gt_inds); - } - - return res; -} - -template -void GatherBoxesLabels(const phi::CPUContext& context, - const phi::DenseTensor& boxes, - const phi::DenseTensor& max_overlap, - const phi::DenseTensor& gt_boxes, - const phi::DenseTensor& gt_classes, - const std::vector& fg_inds, - const std::vector& bg_inds, - const std::vector& gt_inds, - phi::DenseTensor* sampled_boxes, - phi::DenseTensor* sampled_labels, - phi::DenseTensor* sampled_gts, - phi::DenseTensor* sampled_max_overlap) { - int fg_num = static_cast(fg_inds.size()); - int bg_num = static_cast(bg_inds.size()); - phi::DenseTensor fg_inds_t, bg_inds_t, gt_box_inds_t, gt_label_inds_t; - int* fg_inds_data = fg_inds_t.mutable_data({fg_num}, context.GetPlace()); - int* bg_inds_data = bg_inds_t.mutable_data({bg_num}, context.GetPlace()); - int* gt_box_inds_data = - gt_box_inds_t.mutable_data({fg_num}, context.GetPlace()); - int* gt_label_inds_data = - gt_label_inds_t.mutable_data({fg_num}, context.GetPlace()); - std::copy(fg_inds.begin(), fg_inds.end(), fg_inds_data); - std::copy(bg_inds.begin(), bg_inds.end(), bg_inds_data); - std::copy(gt_inds.begin(), gt_inds.end(), gt_box_inds_data); - std::copy(gt_inds.begin(), gt_inds.end(), gt_label_inds_data); - - phi::DenseTensor fg_boxes, bg_boxes, fg_labels, bg_labels; - fg_boxes.mutable_data({fg_num, kBoxDim}, context.GetPlace()); - phi::funcs::CPUGather(context, boxes, fg_inds_t, &fg_boxes); - bg_boxes.mutable_data({bg_num, kBoxDim}, context.GetPlace()); - phi::funcs::CPUGather(context, boxes, bg_inds_t, &bg_boxes); - Concat(context, fg_boxes, bg_boxes, sampled_boxes); - phi::funcs::CPUGather(context, gt_boxes, gt_box_inds_t, sampled_gts); - fg_labels.mutable_data({fg_num}, context.GetPlace()); - phi::funcs::CPUGather(context, gt_classes, gt_label_inds_t, &fg_labels); - bg_labels.mutable_data({bg_num}, context.GetPlace()); - phi::funcs::set_constant(context, &bg_labels, static_cast(0)); - Concat(context, fg_labels, bg_labels, sampled_labels); - - phi::DenseTensor fg_max_overlap, bg_max_overlap; - fg_max_overlap.mutable_data({fg_num}, context.GetPlace()); - phi::funcs::CPUGather(context, max_overlap, fg_inds_t, &fg_max_overlap); - bg_max_overlap.mutable_data({bg_num}, context.GetPlace()); - phi::funcs::CPUGather(context, max_overlap, bg_inds_t, &bg_max_overlap); - Concat(context, fg_max_overlap, bg_max_overlap, sampled_max_overlap); -} - -template -std::vector SampleRoisForOneImage( - const phi::CPUContext& context, - const phi::DenseTensor& rpn_rois_in, - const phi::DenseTensor& gt_classes, - const phi::DenseTensor& is_crowd, - const phi::DenseTensor& gt_boxes, - const phi::DenseTensor& im_info, - const int batch_size_per_im, - const float fg_fraction, - const float fg_thresh, - const float bg_thresh_hi, - const float bg_thresh_lo, - const std::vector& bbox_reg_weights, - const int class_nums, - std::minstd_rand engine, - bool use_random, - bool is_cascade_rcnn, - bool is_cls_agnostic, - const phi::DenseTensor& max_overlap) { - // 1.1 map to original image - auto im_scale = im_info.data()[2]; - phi::DenseTensor rpn_rois; - rpn_rois.mutable_data(rpn_rois_in.dims(), context.GetPlace()); - const T* rpn_rois_in_dt = rpn_rois_in.data(); - T* rpn_rois_dt = rpn_rois.data(); - - for (int i = 0; i < rpn_rois.numel(); ++i) { - rpn_rois_dt[i] = rpn_rois_in_dt[i] / im_scale; - } - - int proposals_num = 1; - - if (is_cascade_rcnn) { - phi::DenseTensor keep; - FilterRoIs(context, rpn_rois, max_overlap, &keep); - phi::DenseTensor roi_filter; - // phi::DenseTensor box_filter; - if (keep.numel() == 0) { - phi::funcs::SetConstant set_zero; - roi_filter.mutable_data({proposals_num, kBoxDim}, context.GetPlace()); - set_zero(context, &roi_filter, static_cast(0)); - } else { - proposals_num = static_cast(keep.numel()); - roi_filter.mutable_data({proposals_num, kBoxDim}, context.GetPlace()); - phi::funcs::CPUGather(context, rpn_rois, keep, &roi_filter); - } - T* roi_filter_dt = roi_filter.data(); - memcpy(rpn_rois_dt, roi_filter_dt, roi_filter.numel() * sizeof(T)); - rpn_rois.Resize(roi_filter.dims()); - } else { - proposals_num = static_cast(rpn_rois.dims()[0]); - } - // 1.2 compute overlaps - proposals_num += static_cast(gt_boxes.dims()[0]); - - phi::DenseTensor proposal_to_gt_overlaps; - proposal_to_gt_overlaps.mutable_data({proposals_num, gt_boxes.dims()[0]}, - context.GetPlace()); - - phi::DenseTensor boxes; - boxes.mutable_data({proposals_num, kBoxDim}, context.GetPlace()); - Concat(context, gt_boxes, rpn_rois, &boxes); - BboxOverlaps(boxes, gt_boxes, &proposal_to_gt_overlaps); - - phi::DenseTensor proposal_with_max_overlap; - proposal_with_max_overlap.mutable_data({proposals_num}, - context.GetPlace()); - - MaxIoU(proposal_to_gt_overlaps, &proposal_with_max_overlap); - - // Generate proposal index - std::vector> fg_bg_gt = - SampleFgBgGt(context, - &proposal_to_gt_overlaps, - is_crowd, - batch_size_per_im, - fg_fraction, - fg_thresh, - bg_thresh_hi, - bg_thresh_lo, - engine, - use_random, - is_cascade_rcnn, - boxes); - std::vector fg_inds = fg_bg_gt[0]; - std::vector bg_inds = fg_bg_gt[1]; - std::vector mapped_gt_inds = fg_bg_gt[2]; // mapped_gt_labels - - // Gather boxes and labels - phi::DenseTensor sampled_boxes, sampled_labels, sampled_gts, - sampled_max_overlap; - int fg_num = static_cast(fg_inds.size()); - int bg_num = static_cast(bg_inds.size()); - int boxes_num = fg_num + bg_num; - framework::DDim bbox_dim({boxes_num, kBoxDim}); - sampled_boxes.mutable_data(bbox_dim, context.GetPlace()); - sampled_labels.mutable_data({boxes_num}, context.GetPlace()); - sampled_gts.mutable_data({fg_num, kBoxDim}, context.GetPlace()); - sampled_max_overlap.mutable_data({boxes_num}, context.GetPlace()); - GatherBoxesLabels(context, - boxes, - proposal_with_max_overlap, - gt_boxes, - gt_classes, - fg_inds, - bg_inds, - mapped_gt_inds, - &sampled_boxes, - &sampled_labels, - &sampled_gts, - &sampled_max_overlap); - - // Compute targets - phi::DenseTensor bbox_targets_single; - bbox_targets_single.mutable_data(bbox_dim, context.GetPlace()); - BoxToDelta(fg_num, - sampled_boxes, - sampled_gts, - bbox_reg_weights.data(), - false, - &bbox_targets_single); - - // Scale rois - phi::DenseTensor sampled_rois; - sampled_rois.mutable_data(sampled_boxes.dims(), context.GetPlace()); - auto sampled_rois_et = framework::EigenTensor::From(sampled_rois); - auto sampled_boxes_et = framework::EigenTensor::From(sampled_boxes); - sampled_rois_et = sampled_boxes_et * im_scale; - - // Expand box targets - phi::DenseTensor bbox_targets, bbox_inside_weights, bbox_outside_weights; - framework::DDim bbox_expand_dim({boxes_num, kBoxDim * class_nums}); - bbox_targets.mutable_data(bbox_expand_dim, context.GetPlace()); - bbox_inside_weights.mutable_data(bbox_expand_dim, context.GetPlace()); - bbox_outside_weights.mutable_data(bbox_expand_dim, context.GetPlace()); - phi::funcs::set_constant(context, &bbox_targets, static_cast(0.0)); - phi::funcs::set_constant(context, &bbox_inside_weights, static_cast(0.0)); - phi::funcs::set_constant(context, &bbox_outside_weights, static_cast(0.0)); - - auto* bbox_targets_single_data = bbox_targets_single.data(); - auto* sampled_labels_data = sampled_labels.data(); - auto* bbox_targets_data = bbox_targets.data(); - auto* bbox_inside_weights_data = bbox_inside_weights.data(); - auto* bbox_outside_weights_data = bbox_outside_weights.data(); - int width = kBoxDim * class_nums; - for (int64_t i = 0; i < boxes_num; ++i) { - int label = sampled_labels_data[i]; - if (label > 0) { - if (is_cls_agnostic) { - label = 1; - } - int dst_idx = static_cast(i * width + kBoxDim * label); - int src_idx = static_cast(kBoxDim * i); - bbox_targets_data[dst_idx] = bbox_targets_single_data[src_idx]; - bbox_targets_data[dst_idx + 1] = bbox_targets_single_data[src_idx + 1]; - bbox_targets_data[dst_idx + 2] = bbox_targets_single_data[src_idx + 2]; - bbox_targets_data[dst_idx + 3] = bbox_targets_single_data[src_idx + 3]; - bbox_inside_weights_data[dst_idx] = 1; - bbox_inside_weights_data[dst_idx + 1] = 1; - bbox_inside_weights_data[dst_idx + 2] = 1; - bbox_inside_weights_data[dst_idx + 3] = 1; - bbox_outside_weights_data[dst_idx] = 1; - bbox_outside_weights_data[dst_idx + 1] = 1; - bbox_outside_weights_data[dst_idx + 2] = 1; - bbox_outside_weights_data[dst_idx + 3] = 1; - } - } - std::vector res; - res.emplace_back(sampled_rois); - res.emplace_back(sampled_labels); - res.emplace_back(bbox_targets); - res.emplace_back(bbox_inside_weights); - res.emplace_back(bbox_outside_weights); - res.emplace_back(sampled_max_overlap); - return res; -} - -template -class GenerateProposalLabelsKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* rpn_rois = context.Input("RpnRois"); - auto* gt_classes = context.Input("GtClasses"); - auto* is_crowd = context.Input("IsCrowd"); - auto* gt_boxes = context.Input("GtBoxes"); - auto* im_info = context.Input("ImInfo"); - - auto* rois = context.Output("Rois"); - auto* labels_int32 = context.Output("LabelsInt32"); - auto* bbox_targets = context.Output("BboxTargets"); - auto* bbox_inside_weights = - context.Output("BboxInsideWeights"); - auto* bbox_outside_weights = - context.Output("BboxOutsideWeights"); - auto* max_overlap_with_gt = - context.Output("MaxOverlapWithGT"); - - int batch_size_per_im = context.Attr("batch_size_per_im"); - float fg_fraction = context.Attr("fg_fraction"); - float fg_thresh = context.Attr("fg_thresh"); - float bg_thresh_hi = context.Attr("bg_thresh_hi"); - float bg_thresh_lo = context.Attr("bg_thresh_lo"); - std::vector bbox_reg_weights = - context.Attr>("bbox_reg_weights"); - int class_nums = context.Attr("class_nums"); - bool use_random = context.Attr("use_random"); - bool is_cascade_rcnn = context.Attr("is_cascade_rcnn"); - bool is_cls_agnostic = context.Attr("is_cls_agnostic"); - PADDLE_ENFORCE_EQ( - rpn_rois->lod().size(), - 1UL, - phi::errors::InvalidArgument( - "GenerateProposalLabelsOp rpn_rois needs 1 level of LoD. But " - "received level of LoD is [%d], LoD is [%s].", - rpn_rois->lod().size(), - rpn_rois->lod())); - PADDLE_ENFORCE_EQ( - gt_classes->lod().size(), - 1UL, - phi::errors::InvalidArgument( - "GenerateProposalLabelsOp gt_classes needs 1 level of LoD. But " - "received level of LoD is [%d], LoD is [%s].", - gt_classes->lod().size(), - gt_classes->lod())); - PADDLE_ENFORCE_EQ( - is_crowd->lod().size(), - 1UL, - phi::errors::InvalidArgument( - "GenerateProposalLabelsOp is_crowd needs 1 level of LoD. But " - "received level of LoD is [%d], LoD is [%s].", - is_crowd->lod().size(), - is_crowd->lod())); - PADDLE_ENFORCE_EQ( - gt_boxes->lod().size(), - 1UL, - phi::errors::InvalidArgument( - "GenerateProposalLabelsOp gt_boxes needs 1 level of LoD. But " - "received level of LoD is [%d], LoD is [%s].", - gt_boxes->lod().size(), - gt_boxes->lod())); - int64_t n = static_cast(rpn_rois->lod().back().size() - 1); - int64_t rois_num = rpn_rois->dims()[0]; - int64_t gts_num = gt_boxes->dims()[0]; - int64_t init_num = - is_cascade_rcnn ? rois_num + gts_num : n * batch_size_per_im; - - rois->mutable_data({init_num, kBoxDim}, context.GetPlace()); - labels_int32->mutable_data({init_num, 1}, context.GetPlace()); - bbox_targets->mutable_data({init_num, kBoxDim * class_nums}, - context.GetPlace()); - bbox_inside_weights->mutable_data({init_num, kBoxDim * class_nums}, - context.GetPlace()); - bbox_outside_weights->mutable_data({init_num, kBoxDim * class_nums}, - context.GetPlace()); - max_overlap_with_gt->Resize({init_num}); - max_overlap_with_gt->mutable_data(context.GetPlace()); - - std::random_device rnd; - std::minstd_rand engine; - int seed = static_cast(rnd()); - engine.seed(seed); - - framework::LoD lod; - std::vector lod0(1, 0); - - int64_t num_rois = 0; - auto& dev_ctx = context.device_context(); - - auto rpn_rois_lod = rpn_rois->lod().back(); - auto gt_classes_lod = gt_classes->lod().back(); - auto is_crowd_lod = is_crowd->lod().back(); - auto gt_boxes_lod = gt_boxes->lod().back(); - for (int i = 0; i < n; ++i) { - if (rpn_rois_lod[i] == rpn_rois_lod[i + 1]) { - lod0.emplace_back(num_rois); - continue; - } - phi::DenseTensor rpn_rois_slice = - rpn_rois->Slice(static_cast(rpn_rois_lod[i]), - static_cast(rpn_rois_lod[i + 1])); - phi::DenseTensor gt_classes_slice = - gt_classes->Slice(static_cast(gt_classes_lod[i]), - static_cast(gt_classes_lod[i + 1])); - phi::DenseTensor is_crowd_slice = - is_crowd->Slice(static_cast(is_crowd_lod[i]), - static_cast(is_crowd_lod[i + 1])); - phi::DenseTensor gt_boxes_slice = - gt_boxes->Slice(static_cast(gt_boxes_lod[i]), - static_cast(gt_boxes_lod[i + 1])); - phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1); - phi::DenseTensor max_overlap_slice; - if (is_cascade_rcnn) { - auto* max_overlap = context.Input("MaxOverlap"); - max_overlap_slice = - max_overlap->Slice(static_cast(rpn_rois_lod[i]), - static_cast(rpn_rois_lod[i + 1])); - } else { - max_overlap_slice.mutable_data({rpn_rois_slice.dims()[0]}, - context.GetPlace()); - } - std::vector tensor_output = - SampleRoisForOneImage(dev_ctx, - rpn_rois_slice, - gt_classes_slice, - is_crowd_slice, - gt_boxes_slice, - im_info_slice, - batch_size_per_im, - fg_fraction, - fg_thresh, - bg_thresh_hi, - bg_thresh_lo, - bbox_reg_weights, - class_nums, - engine, - use_random, - is_cascade_rcnn, - is_cls_agnostic, - max_overlap_slice); - phi::DenseTensor sampled_rois = tensor_output[0]; - phi::DenseTensor sampled_labels_int32 = tensor_output[1]; - phi::DenseTensor sampled_bbox_targets = tensor_output[2]; - phi::DenseTensor sampled_bbox_inside_weights = tensor_output[3]; - phi::DenseTensor sampled_bbox_outside_weights = tensor_output[4]; - phi::DenseTensor sampled_max_overlap = tensor_output[5]; - - AppendRois(rois, kBoxDim * num_rois, &sampled_rois); - AppendRois(labels_int32, num_rois, &sampled_labels_int32); - int64_t offset = kBoxDim * num_rois * class_nums; - AppendRois(bbox_targets, offset, &sampled_bbox_targets); - AppendRois(bbox_inside_weights, offset, &sampled_bbox_inside_weights); - AppendRois( - bbox_outside_weights, offset, &sampled_bbox_outside_weights); - AppendRois(max_overlap_with_gt, num_rois, &sampled_max_overlap); - - num_rois += sampled_rois.dims()[0]; - lod0.emplace_back(num_rois); - } - - lod.emplace_back(lod0); - rois->set_lod(lod); - labels_int32->set_lod(lod); - bbox_targets->set_lod(lod); - bbox_inside_weights->set_lod(lod); - bbox_outside_weights->set_lod(lod); - rois->Resize({num_rois, kBoxDim}); - labels_int32->Resize({num_rois, 1}); - bbox_targets->Resize({num_rois, kBoxDim * class_nums}); - bbox_inside_weights->Resize({num_rois, kBoxDim * class_nums}); - bbox_outside_weights->Resize({num_rois, kBoxDim * class_nums}); - max_overlap_with_gt->Resize({num_rois}); - max_overlap_with_gt->set_lod(lod); - } -}; - -class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput( - "RpnRois", - "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape " - "[N, 4]. " - "N is the number of the GenerateProposalOp's output, " - "each element is a bounding box with [xmin, ymin, xmax, ymax] format."); - AddInput("GtClasses", - "(phi::DenseTensor), This input is a 2D phi::DenseTensor with " - "shape [M, 1]. " - "M is the number of groundtruth, " - "each element is a class label of groundtruth."); - AddInput( - "IsCrowd", - "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape " - "[M, 1]. " - "M is the number of groundtruth, " - "each element is a flag indicates whether a groundtruth is crowd."); - AddInput( - "GtBoxes", - "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape " - "[M, 4]. " - "M is the number of groundtruth, " - "each element is a bounding box with [xmin, ymin, xmax, ymax] format."); - AddInput("ImInfo", - "(Tensor), This input is a 2D Tensor with shape [B, 3]. " - "B is the number of input images, " - "each element consists of im_height, im_width, im_scale."); - AddInput("MaxOverlap", - "(phi::DenseTensor), This input is a 1D phi::DenseTensor with " - "shape [N]." - "N is the number of Input(RpnRois), " - "each element is the maximum overlap between " - "the proposal RoI and ground-truth.") - .AsDispensable(); - - AddOutput( - "Rois", - "(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape " - "[P, 4]. " - "P usuall equal to batch_size_per_im * batch_size, " - "each element is a bounding box with [xmin, ymin, xmax, ymax] format."); - AddOutput("LabelsInt32", - "(phi::DenseTensor), This output is a 2D phi::DenseTensor with " - "shape [P, 1], " - "each element represents a class label of a roi"); - AddOutput("BboxTargets", - "(phi::DenseTensor), This output is a 2D phi::DenseTensor with " - "shape [P, 4 * " - "class_nums], " - "each element represents a box label of a roi"); - AddOutput( - "BboxInsideWeights", - "(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape " - "[P, 4 * " - "class_nums], " - "each element indicates whether a box should contribute to loss."); - AddOutput( - "BboxOutsideWeights", - "(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape " - "[P, 4 * " - "class_nums], " - "each element indicates whether a box should contribute to loss."); - AddOutput("MaxOverlapWithGT", - "(phi::DenseTensor), This output is a 1D phi::DenseTensor with " - "shape [P], " - "each element indicates the maxoverlap " - "between output RoIs and ground-truth. " - "The output RoIs may include ground-truth " - "and the output maxoverlap may contain 1."); - - AddAttr("batch_size_per_im", "Batch size of rois per images."); - AddAttr("fg_fraction", - "Foreground fraction in total batch_size_per_im."); - AddAttr( - "fg_thresh", - "Overlap threshold which is used to chose foreground sample."); - AddAttr("bg_thresh_hi", - "Overlap threshold upper bound which is used to chose " - "background sample."); - AddAttr("bg_thresh_lo", - "Overlap threshold lower bound which is used to chose " - "background sample."); - AddAttr>("bbox_reg_weights", "Box regression weights."); - AddAttr("class_nums", "Class number."); - AddAttr( - "use_random", - "Use random sampling to choose foreground and background boxes.") - .SetDefault(true); - AddAttr("is_cascade_rcnn", - "cascade rcnn sampling policy changed from stage 2.") - .SetDefault(false); - AddAttr( - "is_cls_agnostic", - "the box regress will only include fg and bg locations if set true ") - .SetDefault(false); - - AddComment(R"DOC( -This operator can be, for given the GenerateProposalOp output bounding boxes and groundtruth, -to sample foreground boxes and background boxes, and compute loss target. - -RpnRois is the output boxes of RPN and was processed by generate_proposal_op, these boxes -were combined with groundtruth boxes and sampled according to batch_size_per_im and fg_fraction, -If an instance with a groundtruth overlap greater than fg_thresh, then it was considered as a foreground sample. -If an instance with a groundtruth overlap greater than bg_thresh_lo and lower than bg_thresh_hi, -then it was considered as a background sample. -After all foreground and background boxes are chosen (so called Rois), -then we apply random sampling to make sure -the number of foreground boxes is no more than batch_size_per_im * fg_fraction. - -For each box in Rois, we assign the classification (class label) and regression targets (box label) to it. -Finally BboxInsideWeights and BboxOutsideWeights are used to specify whether it would contribute to training loss. - )DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - generate_proposal_labels, - ops::GenerateProposalLabelsOp, - ops::GenerateProposalLabelsOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); -PD_REGISTER_STRUCT_KERNEL(generate_proposal_labels, - CPU, - ALL_LAYOUT, - ops::GenerateProposalLabelsKernel, - float, - double) {} - -REGISTER_OP_VERSION(generate_proposal_labels) - .AddCheckpoint( - R"ROC( - Upgrade of output [MaxOverlapWithGT])ROC", - paddle::framework::compatible::OpVersionDesc().NewOutput( - "MaxOverlapWithGT", - "The maxoverlap between output RoIs and ground-truth.")) - .AddCheckpoint( - R"ROC( - Upgrade generate_proposal_labels add a new input [MaxOverlap])ROC", - paddle::framework::compatible::OpVersionDesc().NewInput( - "MaxOverlap", "MaxOverlap is dispensable.")); diff --git a/paddle/fluid/operators/detection/mask_util.cc b/paddle/fluid/operators/detection/mask_util.cc deleted file mode 100644 index 5b4dc92f4f6af8..00000000000000 --- a/paddle/fluid/operators/detection/mask_util.cc +++ /dev/null @@ -1,242 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/detection/mask_util.h" - -#include -#include - -#include "paddle/fluid/memory/memory.h" - -namespace paddle { -namespace operators { - -uint32_t UMax(uint32_t a, uint32_t b) { return (a > b) ? a : b; } - -static inline int Compare(const void* a, const void* b) { - uint32_t c = *(reinterpret_cast(a)); - uint32_t d = *(reinterpret_cast(b)); - return c > d ? 1 : c < d ? -1 : 0; -} - -void Decode(const uint32_t* cnts, int m, uint8_t* mask) { - uint8_t v = 0; - for (int j = 0; j < m; j++) { - for (uint32_t k = 0; k < cnts[j]; k++) { - *(mask++) = v; - } - v = !v; - } -} - -typedef uint32_t uint; -void Poly2Mask(const float* xy, int k, int h, int w, uint8_t* mask) { - int j = 0, m = 0; - double scale = 5; - int *x = nullptr, *y = nullptr, *u = nullptr, *v = nullptr; - uint *a = nullptr, *b = nullptr; - platform::CPUPlace cpu; - auto xptr = memory::Alloc(cpu, sizeof(int) * (k + 1) * 2); - x = reinterpret_cast(xptr->ptr()); - y = x + (k + 1); - - for (j = 0; j < k; j++) - x[j] = static_cast(std::lround(scale * xy[j * 2 + 0])); - x[k] = x[0]; - for (j = 0; j < k; j++) - y[j] = static_cast(std::lround(scale * xy[j * 2 + 1])); - y[k] = y[0]; - for (j = 0; j < k; j++) { - m += static_cast(UMax(abs(x[j] - x[j + 1]), abs(y[j] - y[j + 1])) + 1); - } - auto vptr = memory::Alloc(cpu, sizeof(int) * m * 2); - u = reinterpret_cast(vptr->ptr()); - v = u + m; - m = 0; - for (j = 0; j < k; j++) { - int xs = x[j], xe = x[j + 1], ys = y[j], ye = y[j + 1], dx = 0, dy = 0, - t = 0, d = 0; - int flip = 0; - double s = NAN; - dx = abs(xe - xs); - dy = abs(ys - ye); - flip = (dx >= dy && xs > xe) || (dx < dy && ys > ye); - if (flip) { - t = xs; - xs = xe; - xe = t; - t = ys; - ys = ye; - ye = t; - } - if (dx >= dy) { - s = dx == 0 ? 0 : static_cast(ye - ys) / dx; - for (d = 0; d <= dx; d++) { - t = flip ? dx - d : d; - u[m] = t + xs; - v[m] = static_cast(std::lround(ys + s * t)); - m++; - } - } else { - s = dy == 0 ? 0 : static_cast(xe - xs) / dy; - for (d = 0; d <= dy; d++) { - t = flip ? dy - d : d; - v[m] = t + ys; - u[m] = static_cast(std::lround(xs + s * t)); - m++; - } - } - } - /* get points along y-boundary and downsample */ - k = m; - m = 0; - double xd = NAN, yd = NAN; - auto xyptr = memory::Alloc(cpu, sizeof(int) * k * 2); - x = reinterpret_cast(xyptr->ptr()); - y = x + k; - for (j = 1; j < k; j++) { - if (u[j] != u[j - 1]) { - xd = static_cast(u[j] < u[j - 1] ? u[j] : u[j] - 1); - xd = (xd + .5) / scale - .5; - if (floor(xd) != xd || xd < 0 || xd > w - 1) continue; - yd = static_cast(v[j] < v[j - 1] ? v[j] : v[j - 1]); - yd = (yd + .5) / scale - .5; - if (yd < 0) - yd = 0; - else if (yd > h) - yd = h; - yd = ceil(yd); - x[m] = static_cast(xd); - y[m] = static_cast(yd); - m++; - } - } - /* compute rle encoding given y-boundary points */ - k = m; - auto aptr = memory::Alloc(cpu, sizeof(uint) * (k + 1)); - a = reinterpret_cast(aptr->ptr()); - for (j = 0; j < k; j++) a[j] = static_cast(x[j] * h + y[j]); - a[k++] = static_cast(h * w); - - qsort(a, k, sizeof(uint), Compare); - uint p = 0; - for (j = 0; j < k; j++) { - uint t = a[j]; - a[j] -= p; - p = t; - } - auto bptr = memory::Alloc(cpu, sizeof(uint32_t) * k); - b = reinterpret_cast(bptr->ptr()); - j = m = 0; - b[m++] = a[j++]; - while (j < k) { - if (a[j] > 0) { - b[m++] = a[j++]; - } else { - j++; - if (j < k) b[m - 1] += a[j++]; - } - } - - // convert to mask - auto mskptr = memory::Alloc(cpu, sizeof(uint8_t) * h * w); - uint8_t* msk = reinterpret_cast(mskptr->ptr()); - Decode(b, m, msk); - - for (int ii = 0; ii < h; ++ii) { - for (int jj = 0; jj < w; ++jj) { - mask[ii * w + jj] = msk[jj * h + ii]; - } - } -} - -void Poly2Boxes(const std::vector>>& polys, - float* boxes) { - // lists - for (size_t i = 0; i < polys.size(); ++i) { - float x0 = std::numeric_limits::max(); - float x1 = std::numeric_limits::min(); - float y0 = std::numeric_limits::max(); - float y1 = std::numeric_limits::min(); - // each list may have more than one polys - for (const auto& item : polys[i]) { - for (size_t k = 0; k < item.size() / 2; ++k) { - x0 = std::min(x0, item[2 * k]); - x1 = std::max(x1, item[2 * k]); - y0 = std::min(y0, item[2 * k + 1]); - y1 = std::max(y1, item[2 * k + 1]); - } - } - boxes[i * 4] = x0; - boxes[i * 4 + 1] = y0; - boxes[i * 4 + 2] = x1; - boxes[i * 4 + 3] = y1; - } -} - -void Polys2MaskWrtBox(const std::vector>& polygons, - const float* box, - int M, - uint8_t* mask) { - float w = box[2] - box[0]; - float h = box[3] - box[1]; - w = std::max(w, static_cast(1.)); - h = std::max(h, static_cast(1.)); - - // short-circuit for case "polygons.size() == 1" - if (polygons.size() == 1UL) { - int k = static_cast(polygons[0].size() / 2); - std::vector p; - for (int j = 0; j < k; ++j) { - float pw = (polygons[0][2 * j] - box[0]) * M / w; // NOLINT - float ph = (polygons[0][2 * j + 1] - box[1]) * M / h; // NOLINT - p.push_back(pw); - p.push_back(ph); - } - Poly2Mask(p.data(), k, M, M, mask); - - return; - } - - uint8_t* msk = reinterpret_cast( - malloc(M * M * polygons.size() * sizeof(uint8_t))); // NOLINT - - for (size_t i = 0; i < polygons.size(); ++i) { - int k = static_cast(polygons[i].size() / 2); - std::vector p; - for (int j = 0; j < k; ++j) { - float pw = (polygons[i][2 * j] - box[0]) * M / w; // NOLINT - float ph = (polygons[i][2 * j + 1] - box[1]) * M / h; // NOLINT - p.push_back(pw); - p.push_back(ph); - } - uint8_t* msk_i = msk + i * M * M; - Poly2Mask(p.data(), k, M, M, msk_i); - } - - for (size_t i = 0; i < polygons.size(); ++i) { - uint8_t* msk_i = msk + i * M * M; - for (int j = 0; j < M * M; ++j) { - if (i == 0) { - mask[j] = msk_i[j]; - } else { - mask[j] = (mask[j] + msk_i[j]) > 0 ? 1 : 0; - } - } - } - free(msk); // NOLINT -} - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/detection/mask_util.h b/paddle/fluid/operators/detection/mask_util.h deleted file mode 100644 index 587a9c53794def..00000000000000 --- a/paddle/fluid/operators/detection/mask_util.h +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include - -#include - -#include "paddle/utils/test_macros.h" - -namespace paddle { -namespace operators { - -TEST_API void Poly2Mask(const float* ploy, int k, int h, int w, uint8_t* mask); - -TEST_API void Poly2Boxes( - const std::vector>>& polys, float* boxes); - -TEST_API void Polys2MaskWrtBox(const std::vector>& polygons, - const float* box, - int M, - uint8_t* mask); -} // namespace operators -} // namespace paddle diff --git a/test/cpp/fluid/CMakeLists.txt b/test/cpp/fluid/CMakeLists.txt index 76aa8a6635225f..d07156f16d57c8 100644 --- a/test/cpp/fluid/CMakeLists.txt +++ b/test/cpp/fluid/CMakeLists.txt @@ -7,7 +7,7 @@ if(WITH_CINN) add_subdirectory(cinn) endif() add_subdirectory(controlflow) -add_subdirectory(detection) + if(WITH_DLNNE) add_subdirectory(dlnne) endif() diff --git a/test/cpp/fluid/detection/CMakeLists.txt b/test/cpp/fluid/detection/CMakeLists.txt deleted file mode 100644 index 6a69241e7846ef..00000000000000 --- a/test/cpp/fluid/detection/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -paddle_test(mask_util_test SRCS mask_util_test.cc) - -if(WITH_ONNXRUNTIME AND WIN32) - # Copy onnxruntime for some c++ test in Windows, since the test will - # be build only in CI, so suppose the generator in Windows is Ninja. - copy_onnx(mask_util_test) -endif() diff --git a/test/cpp/fluid/detection/mask_util_test.cc b/test/cpp/fluid/detection/mask_util_test.cc deleted file mode 100644 index 274850c0a67dcd..00000000000000 --- a/test/cpp/fluid/detection/mask_util_test.cc +++ /dev/null @@ -1,126 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/detection/mask_util.h" - -#include - -#include "paddle/fluid/memory/memory.h" - -namespace paddle { -namespace operators { - -template -void Compare(const T* a, const T* b, const int n) { - for (int i = 0; i < n; i++) { - EXPECT_EQ(a[i], b[i]); - } -} - -TEST(MaskUtil, Poly2MaskTest) { - float polys[] = {// NOLINT - 1.97f, - 1.88f, - 5.81f, - 1.88f, - 1.69f, - 6.53f, - 5.94f, - 6.38f, - 1.97f, - 1.88f}; - int h = 8, w = 8; - int k = 5; // length(polys) / 2 - // clang-format off - uint8_t expect_mask[] = { // NOLINT - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 0, 0, 0, 0, - 0, 0, 1, 1, 1, 0, 0, 0, - 0, 0, 1, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 - }; - // clang-format on - - // the ground-truth mask is computed by coco API: - // - // import pycocotools.mask as mask_util - // import numpy as np - // segm = [1.97, 1.88, 5.81, 1.88, 1.69, 6.53, 5.94, 6.38, 1.97, 1.88] - // rles = mask_util.frPyObjects([segm], im_h, im_w) - // mask = mask_util.decode(rles) - // print mask - platform::CPUPlace cpu; - auto allocation = memory::Alloc(cpu, sizeof(expect_mask)); - uint8_t* mask = reinterpret_cast(allocation->ptr()); - Poly2Mask(polys, k, h, w, mask); - Compare(expect_mask, mask, h * w); -} - -TEST(MaskUtil, Poly2BoxesTest) { - // clang-format off - std::vector>> polys = { - {{1.97f, 1.88f, 5.81f, 1.88f, 1.69f, 6.53f, 5.94f, 6.38f, 1.97f, 1.88f}}, - {{2.97f, 1.88f, 3.81f, 1.68f, 1.69f, 6.63f, 6.94f, 6.58f, 2.97f, 0.88f}} - }; - float expect_boxes[] = { // NOLINT - 1.69f, 1.88f, 5.94f, 6.53f, - 1.69f, 0.88f, 6.94f, 6.63f - }; - // clang-format on - - platform::CPUPlace cpu; - auto allocation = memory::Alloc(cpu, sizeof(expect_boxes)); - float* boxes = reinterpret_cast(allocation->ptr()); - Poly2Boxes(polys, boxes); - Compare(expect_boxes, boxes, 8); -} - -TEST(MaskUtil, Polys2MaskWrtBoxTest) { - // clang-format off - std::vector>> polys = {{ - {1.97f, 1.88f, 5.81f, 1.88f, 1.69f, 6.53f, 5.94f, 6.38f, 1.97f, 1.88f}, - {2.97f, 1.88f, 3.81f, 1.68f, 1.69f, 6.63f, 6.94f, 6.58f, 2.97f, 0.88f}}}; - float expect_boxes[] = { // NOLINT - 1.69f, 0.88f, 6.94f, 6.63f - }; - uint8_t expect_mask[] = { // NOLINT - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 0, 0, - 0, 0, 1, 1, 1, 0, 0, 0, - 0, 0, 1, 1, 1, 0, 0, 0, - 0, 0, 1, 1, 1, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 0, 0, - 0, 1, 1, 1, 1, 1, 1, 0, - 1, 1, 1, 1, 1, 1, 1, 1 - }; - // clang-format on - - platform::CPUPlace cpu; - auto allocation = memory::Alloc(cpu, sizeof(expect_boxes)); - float* boxes = reinterpret_cast(allocation->ptr()); - Poly2Boxes(polys, boxes); - Compare(expect_boxes, boxes, 4); - - auto allocation_mask = memory::Alloc(cpu, sizeof(expect_mask)); - uint8_t* mask = reinterpret_cast(allocation_mask->ptr()); - int M = 8; - Polys2MaskWrtBox(polys[0], expect_boxes, M, mask); - Compare(expect_mask, mask, M * M); -} - -} // namespace operators -} // namespace paddle diff --git a/test/legacy_test/test_box_decoder_and_assign_op.py b/test/legacy_test/test_box_decoder_and_assign_op.py deleted file mode 100644 index 555e5fbd2c6f7a..00000000000000 --- a/test/legacy_test/test_box_decoder_and_assign_op.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest - - -def box_decoder_and_assign(deltas, weights, boxes, box_score, box_clip): - boxes = boxes.astype(deltas.dtype, copy=False) - widths = boxes[:, 2] - boxes[:, 0] + 1.0 - heights = boxes[:, 3] - boxes[:, 1] + 1.0 - ctr_x = boxes[:, 0] + 0.5 * widths - ctr_y = boxes[:, 1] + 0.5 * heights - wx, wy, ww, wh = weights - dx = deltas[:, 0::4] * wx - dy = deltas[:, 1::4] * wy - dw = deltas[:, 2::4] * ww - dh = deltas[:, 3::4] * wh - # Prevent sending too large values into np.exp() - dw = np.minimum(dw, box_clip) - dh = np.minimum(dh, box_clip) - pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] - pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] - pred_w = np.exp(dw) * widths[:, np.newaxis] - pred_h = np.exp(dh) * heights[:, np.newaxis] - pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) - # x1 - pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w - # y1 - pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h - # x2 (note: "- 1" is correct; don't be fooled by the asymmetry) - pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 - # y2 (note: "- 1" is correct; don't be fooled by the asymmetry) - pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 - - output_assign_box = [] - for ino in range(len(pred_boxes)): - rank = np.argsort(-box_score[ino]) - maxidx = rank[0] - if maxidx == 0: - maxidx = rank[1] - beg_pos = maxidx * 4 - end_pos = maxidx * 4 + 4 - output_assign_box.append(pred_boxes[ino, beg_pos:end_pos]) - output_assign_box = np.array(output_assign_box) - - return pred_boxes, output_assign_box - - -class TestBoxDecoderAndAssignOpWithLoD(OpTest): - def test_check_output(self): - # NODE(yjjiang11): This op will be deprecated. - self.check_output(check_dygraph=False) - - def setUp(self): - self.op_type = "box_decoder_and_assign" - lod = [[4, 8, 8]] - num_classes = 10 - prior_box = np.random.random((20, 4)).astype('float32') - prior_box_var = np.array([0.1, 0.1, 0.2, 0.2], dtype=np.float32) - target_box = np.random.random((20, 4 * num_classes)).astype('float32') - box_score = np.random.random((20, num_classes)).astype('float32') - box_clip = 4.135 - output_box, output_assign_box = box_decoder_and_assign( - target_box, prior_box_var, prior_box, box_score, box_clip - ) - - self.inputs = { - 'PriorBox': (prior_box, lod), - 'PriorBoxVar': prior_box_var, - 'TargetBox': (target_box, lod), - 'BoxScore': (box_score, lod), - } - self.attrs = {'box_clip': box_clip} - self.outputs = { - 'DecodeBox': output_box, - 'OutputAssignBox': output_assign_box, - } - - -if __name__ == '__main__': - unittest.main() diff --git a/test/legacy_test/test_density_prior_box_op.py b/test/legacy_test/test_density_prior_box_op.py deleted file mode 100644 index 9d621dc551111f..00000000000000 --- a/test/legacy_test/test_density_prior_box_op.py +++ /dev/null @@ -1,173 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import unittest - -import numpy as np -from op_test import OpTest - - -class TestDensityPriorBoxOp(OpTest): - def set_data(self): - self.init_test_params() - self.init_test_input() - self.init_test_output() - self.inputs = {'Input': self.input, 'Image': self.image} - - self.attrs = { - 'variances': self.variances, - 'clip': self.clip, - 'step_w': self.step_w, - 'step_h': self.step_h, - 'offset': self.offset, - 'densities': self.densities, - 'fixed_sizes': self.fixed_sizes, - 'fixed_ratios': self.fixed_ratios, - 'flatten_to_2d': self.flatten_to_2d, - } - self.outputs = {'Boxes': self.out_boxes, 'Variances': self.out_var} - - def test_check_output(self): - self.check_output() - - def setUp(self): - self.op_type = "density_prior_box" - self.set_data() - - def set_density(self): - self.densities = [4, 2, 1] - self.fixed_sizes = [32.0, 64.0, 128.0] - self.fixed_ratios = [1.0] - self.layer_w = 17 - self.layer_h = 17 - self.image_w = 533 - self.image_h = 533 - self.flatten_to_2d = False - - def init_test_params(self): - self.set_density() - - self.step_w = float(self.image_w) / float(self.layer_w) - self.step_h = float(self.image_h) / float(self.layer_h) - - self.input_channels = 2 - self.image_channels = 3 - self.batch_size = 10 - - self.variances = [0.1, 0.1, 0.2, 0.2] - self.variances = np.array(self.variances, dtype=np.float64).flatten() - - self.clip = True - self.num_priors = 0 - if len(self.fixed_sizes) > 0 and len(self.densities) > 0: - for density in self.densities: - if len(self.fixed_ratios) > 0: - self.num_priors += len(self.fixed_ratios) * ( - pow(density, 2) - ) - self.offset = 0.5 - - def init_test_input(self): - self.image = np.random.random( - (self.batch_size, self.image_channels, self.image_w, self.image_h) - ).astype('float32') - - self.input = np.random.random( - (self.batch_size, self.input_channels, self.layer_w, self.layer_h) - ).astype('float32') - - def init_test_output(self): - out_dim = (self.layer_h, self.layer_w, self.num_priors, 4) - out_boxes = np.zeros(out_dim).astype('float32') - out_var = np.zeros(out_dim).astype('float32') - - step_average = int((self.step_w + self.step_h) * 0.5) - for h in range(self.layer_h): - for w in range(self.layer_w): - idx = 0 - c_x = (w + self.offset) * self.step_w - c_y = (h + self.offset) * self.step_h - # Generate density prior boxes with fixed size - for density, fixed_size in zip( - self.densities, self.fixed_sizes - ): - if len(self.fixed_ratios) > 0: - for ar in self.fixed_ratios: - shift = int(step_average / density) - box_width_ratio = fixed_size * math.sqrt(ar) - box_height_ratio = fixed_size / math.sqrt(ar) - for di in range(density): - for dj in range(density): - c_x_temp = ( - c_x - - step_average / 2.0 - + shift / 2.0 - + dj * shift - ) - c_y_temp = ( - c_y - - step_average / 2.0 - + shift / 2.0 - + di * shift - ) - out_boxes[h, w, idx, :] = [ - max( - (c_x_temp - box_width_ratio / 2.0) - / self.image_w, - 0, - ), - max( - (c_y_temp - box_height_ratio / 2.0) - / self.image_h, - 0, - ), - min( - (c_x_temp + box_width_ratio / 2.0) - / self.image_w, - 1, - ), - min( - (c_y_temp + box_height_ratio / 2.0) - / self.image_h, - 1, - ), - ] - idx += 1 - if self.clip: - out_boxes = np.clip(out_boxes, 0.0, 1.0) - out_var = np.tile( - self.variances, (self.layer_h, self.layer_w, self.num_priors, 1) - ) - self.out_boxes = out_boxes.astype('float32') - self.out_var = out_var.astype('float32') - if self.flatten_to_2d: - self.out_boxes = self.out_boxes.reshape((-1, 4)) - self.out_var = self.out_var.reshape((-1, 4)) - - -class TestDensityPriorBox(TestDensityPriorBoxOp): - def set_density(self): - self.densities = [3, 4] - self.fixed_sizes = [1.0, 2.0] - self.fixed_ratios = [1.0] - self.layer_w = 32 - self.layer_h = 32 - self.image_w = 40 - self.image_h = 40 - self.flatten_to_2d = True - - -if __name__ == '__main__': - unittest.main() diff --git a/test/legacy_test/test_generate_mask_labels_op.py b/test/legacy_test/test_generate_mask_labels_op.py deleted file mode 100644 index 86ab3cb0888793..00000000000000 --- a/test/legacy_test/test_generate_mask_labels_op.py +++ /dev/null @@ -1,317 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import unittest - -import numpy as np - -''' -# Equivalent code -rles = mask_util.frPyObjects([segm], im_h, im_w) -mask = mask_util.decode(rles) -''' - - -def decode(cnts, m): - v = 0 - mask = [] - for j in range(m): - for k in range(cnts[j]): - mask.append(v) - v = 1 - v - return mask - - -def poly2mask(xy, k, h, w): - scale = 5.0 - x = [int(scale * p + 0.5) for p in xy[::2]] - x = x + [x[0]] - y = [int(scale * p + 0.5) for p in xy[1::2]] - y = y + [y[0]] - m = sum( - [ - int(max(abs(x[j] - x[j + 1]), abs(y[j] - y[j + 1]))) + 1 - for j in range(k) - ] - ) - - u, v = [], [] - for j in range(k): - xs = x[j] - xe = x[j + 1] - ys = y[j] - ye = y[j + 1] - dx = abs(xe - xs) - dy = abs(ys - ye) - flip = (dx >= dy and xs > xe) or (dx < dy and ys > ye) - if flip: - xs, xe = xe, xs - ys, ye = ye, ys - - if dx >= dy: - if dx == 0: - assert ye - ys == 0 - s = 0 if dx == 0 else float(ye - ys) / dx - else: - if dy == 0: - assert xe - xs == 0 - s = 0 if dy == 0 else float(xe - xs) / dy - - if dx >= dy: - ts = [dx - d if flip else d for d in range(dx + 1)] - u.extend([xs + t for t in ts]) - v.extend([int(ys + s * t + 0.5) for t in ts]) - else: - ts = [dy - d if flip else d for d in range(dy + 1)] - v.extend([t + ys for t in ts]) - u.extend([int(xs + s * t + 0.5) for t in ts]) - - k = len(u) - x = np.zeros((k), np.int_) - y = np.zeros((k), np.int_) - m = 0 - for j in range(1, k): - if u[j] != u[j - 1]: - xd = float(u[j] if (u[j] < u[j - 1]) else (u[j] - 1)) - xd = (xd + 0.5) / scale - 0.5 - if math.floor(xd) != xd or xd < 0 or xd > (w - 1): - continue - yd = float(v[j] if v[j] < v[j - 1] else v[j - 1]) - yd = (yd + 0.5) / scale - 0.5 - yd = math.ceil(0 if yd < 0 else (h if yd > h else yd)) - x[m] = int(xd) - y[m] = int(yd) - m += 1 - k = m - a = [int(x[i] * h + y[i]) for i in range(k)] - a.append(h * w) - a.sort() - b = [0] + a[: len(a) - 1] - a = [c - d for (c, d) in zip(a, b)] - - k += 1 - b = [0 for i in range(k)] - b[0] = a[0] - m, j = 1, 1 - while j < k: - if a[j] > 0: - b[m] = a[j] - m += 1 - j += 1 - else: - j += 1 - if j < k: - b[m - 1] += a[j] - j += 1 - mask = decode(b, m) - mask = np.array(mask, dtype=np.int_).reshape((w, h)) - mask = mask.transpose((1, 0)) - return mask - - -def polys_to_boxes(polys): - """Convert a list of polygons into an array of tight bounding boxes.""" - boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32) - for i in range(len(polys)): - poly = polys[i] - x0 = min(min(p[::2]) for p in poly) - x1 = max(max(p[::2]) for p in poly) - y0 = min(min(p[1::2]) for p in poly) - y1 = max(max(p[1::2]) for p in poly) - boxes_from_polys[i, :] = [x0, y0, x1, y1] - return boxes_from_polys - - -def bbox_overlaps(boxes, query_boxes): - N = boxes.shape[0] - K = query_boxes.shape[0] - overlaps = np.zeros((N, K), dtype=boxes.dtype) - for k in range(K): - box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * ( - query_boxes[k, 3] - query_boxes[k, 1] + 1 - ) - for n in range(N): - iw = ( - min(boxes[n, 2], query_boxes[k, 2]) - - max(boxes[n, 0], query_boxes[k, 0]) - + 1 - ) - if iw > 0: - ih = ( - min(boxes[n, 3], query_boxes[k, 3]) - - max(boxes[n, 1], query_boxes[k, 1]) - + 1 - ) - if ih > 0: - ua = float( - (boxes[n, 2] - boxes[n, 0] + 1) - * (boxes[n, 3] - boxes[n, 1] + 1) - + box_area - - iw * ih - ) - overlaps[n, k] = iw * ih / ua - return overlaps - - -def polys_to_mask_wrt_box(polygons, box, M): - """Convert from the COCO polygon segmentation format to a binary mask - encoded as a 2D array of data type numpy.float32. The polygon segmentation - is understood to be enclosed in the given box and rasterized to an M x M - mask. The resulting mask is therefore of shape (M, M). - """ - w = box[2] - box[0] - h = box[3] - box[1] - - w = np.maximum(w, 1) - h = np.maximum(h, 1) - - polygons_norm = [] - for poly in polygons: - p = np.array(poly, dtype=np.float32) - p[0::2] = (p[0::2] - box[0]) * M / w - p[1::2] = (p[1::2] - box[1]) * M / h - polygons_norm.append(p) - - mask = [] - for polygons in polygons_norm: - assert polygons.shape[0] % 2 == 0 - k = polygons.shape[0] // 2 - mask.append(poly2mask(polygons, k, M, M)) - mask = np.array(mask) - # Flatten in case polygons was a list - mask = np.sum(mask, axis=0) - mask = np.array(mask > 0, dtype=np.float32) - return mask - - -def expand_mask_targets(masks, mask_class_labels, resolution, num_classes): - """Expand masks from shape (#masks, resolution ** 2) - to (#masks, #classes * resolution ** 2) to encode class - specific mask targets. - """ - assert masks.shape[0] == mask_class_labels.shape[0] - - # Target values of -1 are "don't care" / ignore labels - mask_targets = -np.ones( - (masks.shape[0], num_classes * resolution**2), dtype=np.int32 - ) - for i in range(masks.shape[0]): - cls = int(mask_class_labels[i]) - start = resolution**2 * cls - end = start + resolution**2 - # Ignore background instance - # (only happens when there is no fg samples in an image) - if cls > 0: - mask_targets[i, start:end] = masks[i, :] - return mask_targets - - -def generate_mask_labels( - num_classes, - im_info, - gt_classes, - is_crowd, - label_int32, - gt_polys, - resolution, - rois, - roi_lod, - gt_lod, -): - mask_rois = [] - roi_has_mask_int32 = [] - mask_int32 = [] - new_lod = [] - for i in range(len(im_info)): - roi_s = roi_lod[i] - roi_e = roi_lod[i + 1] - gt_s = gt_lod[i] - gt_e = gt_lod[i + 1] - mask_blob = _sample_mask( - num_classes, - im_info[i], - gt_classes[gt_s:gt_e], - is_crowd[gt_s:gt_e], - label_int32[roi_s:roi_e], - gt_polys[i], - resolution, - rois[roi_s:roi_e], - ) - new_lod.append(mask_blob['mask_rois'].shape[0]) - mask_rois.append(mask_blob['mask_rois']) - roi_has_mask_int32.append(mask_blob['roi_has_mask_int32']) - mask_int32.append(mask_blob['mask_int32']) - return mask_rois, roi_has_mask_int32, mask_int32, new_lod - - -def _sample_mask( - num_classes, - im_info, - gt_classes, - is_crowd, - label_int32, - gt_polys, # [[[], []], []] - resolution, - rois, -): - mask_blob = {} - im_scale = im_info[2] - sample_boxes = rois - polys_gt_inds = np.where((gt_classes > 0) & (is_crowd == 0))[0] - polys_gt = [gt_polys[i] for i in polys_gt_inds] - boxes_from_polys = polys_to_boxes(polys_gt) - - fg_inds = np.where(label_int32 > 0)[0] - roi_has_mask = fg_inds.copy() - if fg_inds.shape[0] > 0: - mask_class_labels = label_int32[fg_inds] - masks = np.zeros((fg_inds.shape[0], resolution**2), dtype=np.int32) - rois_fg = sample_boxes[fg_inds] - overlaps_bbfg_bbpolys = bbox_overlaps( - rois_fg.astype(np.float32), boxes_from_polys.astype(np.float32) - ) - fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) - for i in range(rois_fg.shape[0]): - fg_polys_ind = fg_polys_inds[i] - poly_gt = polys_gt[fg_polys_ind] - roi_fg = rois_fg[i] - mask = polys_to_mask_wrt_box(poly_gt, roi_fg, resolution) - mask = np.array(mask > 0, dtype=np.int32) - masks[i, :] = np.reshape(mask, resolution**2) - else: - bg_inds = np.where(label_int32 == 0)[0] - rois_fg = sample_boxes[bg_inds[0]].reshape((1, -1)) - masks = -np.ones((1, resolution**2), dtype=np.int32) - mask_class_labels = np.zeros((1,)) - roi_has_mask = np.append(roi_has_mask, 0) - masks = expand_mask_targets( - masks, mask_class_labels, resolution, num_classes - ) - rois_fg *= im_scale - mask_blob['mask_rois'] = rois_fg - mask_blob['roi_has_mask_int32'] = roi_has_mask - mask_blob['mask_int32'] = masks - return mask_blob - - -def trans_lod(lod): - new_lod = [0] - for i in range(len(lod)): - new_lod.append(lod[i] + new_lod[i]) - return new_lod - - -if __name__ == '__main__': - unittest.main() diff --git a/test/legacy_test/test_generate_proposal_labels_op.py b/test/legacy_test/test_generate_proposal_labels_op.py deleted file mode 100644 index 903201b9856a7c..00000000000000 --- a/test/legacy_test/test_generate_proposal_labels_op.py +++ /dev/null @@ -1,553 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from op_test import OpTest - - -def generate_proposal_labels_in_python( - rpn_rois, - gt_classes, - is_crowd, - gt_boxes, - im_info, - batch_size_per_im, - fg_fraction, - fg_thresh, - bg_thresh_hi, - bg_thresh_lo, - bbox_reg_weights, - class_nums, - use_random, - is_cls_agnostic, - is_cascade_rcnn, - max_overlaps=None, -): - rois = [] - labels_int32 = [] - bbox_targets = [] - bbox_inside_weights = [] - bbox_outside_weights = [] - max_overlap_with_gt = [] - lod = [] - assert len(rpn_rois) == len( - im_info - ), 'batch size of rpn_rois and ground_truth is not matched' - - for im_i in range(len(im_info)): - max_overlap = max_overlaps[im_i] if is_cascade_rcnn else None - frcn_blobs = _sample_rois( - rpn_rois[im_i], - gt_classes[im_i], - is_crowd[im_i], - gt_boxes[im_i], - im_info[im_i], - batch_size_per_im, - fg_fraction, - fg_thresh, - bg_thresh_hi, - bg_thresh_lo, - bbox_reg_weights, - class_nums, - use_random, - is_cls_agnostic, - is_cascade_rcnn, - max_overlap, - ) - lod.append(frcn_blobs['rois'].shape[0]) - rois.append(frcn_blobs['rois']) - labels_int32.append(frcn_blobs['labels_int32']) - bbox_targets.append(frcn_blobs['bbox_targets']) - bbox_inside_weights.append(frcn_blobs['bbox_inside_weights']) - bbox_outside_weights.append(frcn_blobs['bbox_outside_weights']) - max_overlap_with_gt.append(frcn_blobs['max_overlap']) - - return ( - rois, - labels_int32, - bbox_targets, - bbox_inside_weights, - bbox_outside_weights, - max_overlap_with_gt, - lod, - ) - - -def filter_roi(rois, max_overlap): - ws = rois[:, 2] - rois[:, 0] + 1 - hs = rois[:, 3] - rois[:, 1] + 1 - keep = np.where((ws > 0) & (hs > 0) & (max_overlap < 1.0))[0] - if len(keep) > 0: - return rois[keep, :] - return np.zeros((1, 4)).astype('float32') - - -def _sample_rois( - rpn_rois, - gt_classes, - is_crowd, - gt_boxes, - im_info, - batch_size_per_im, - fg_fraction, - fg_thresh, - bg_thresh_hi, - bg_thresh_lo, - bbox_reg_weights, - class_nums, - use_random, - is_cls_agnostic, - is_cascade_rcnn, - max_overlap, -): - rois_per_image = int(batch_size_per_im) - fg_rois_per_im = int(np.round(fg_fraction * rois_per_image)) - - # Roidb - im_scale = im_info[2] - inv_im_scale = 1.0 / im_scale - rpn_rois = rpn_rois * inv_im_scale - - if is_cascade_rcnn: - rpn_rois = filter_roi(rpn_rois, max_overlap) - - boxes = np.vstack([gt_boxes, rpn_rois]) - - gt_overlaps = np.zeros((boxes.shape[0], class_nums)) - box_to_gt_ind_map = np.zeros((boxes.shape[0]), dtype=np.int32) - proposal_to_gt_overlaps = _bbox_overlaps(boxes, gt_boxes) - - overlaps_argmax = proposal_to_gt_overlaps.argmax(axis=1) - overlaps_max = proposal_to_gt_overlaps.max(axis=1) - # Boxes which with non-zero overlap with gt boxes - overlapped_boxes_ind = np.where(overlaps_max > 0)[0] - overlapped_boxes_gt_classes = gt_classes[ - overlaps_argmax[overlapped_boxes_ind] - ] - gt_overlaps[ - overlapped_boxes_ind, overlapped_boxes_gt_classes - ] = overlaps_max[overlapped_boxes_ind] - box_to_gt_ind_map[overlapped_boxes_ind] = overlaps_argmax[ - overlapped_boxes_ind - ] - - crowd_ind = np.where(is_crowd)[0] - gt_overlaps[crowd_ind] = -1.0 - max_overlaps = gt_overlaps.max(axis=1) - max_classes = gt_overlaps.argmax(axis=1) - - if is_cascade_rcnn: - # Cascade RCNN Decode Filter - fg_inds = np.where(max_overlaps >= fg_thresh)[0] - bg_inds = np.where( - (max_overlaps < bg_thresh_hi) & (max_overlaps >= bg_thresh_lo) - )[0] - fg_rois_per_this_image = fg_inds.shape[0] - bg_rois_per_this_image = bg_inds.shape[0] - else: - # Foreground - fg_inds = np.where(max_overlaps >= fg_thresh)[0] - fg_rois_per_this_image = np.minimum(fg_rois_per_im, fg_inds.shape[0]) - # Sample foreground if there are too many - if (fg_inds.shape[0] > fg_rois_per_this_image) and use_random: - fg_inds = np.random.choice( - fg_inds, size=fg_rois_per_this_image, replace=False - ) - fg_inds = fg_inds[:fg_rois_per_this_image] - # Background - bg_inds = np.where( - (max_overlaps < bg_thresh_hi) & (max_overlaps >= bg_thresh_lo) - )[0] - bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image - bg_rois_per_this_image = np.minimum( - bg_rois_per_this_image, bg_inds.shape[0] - ) - # Sample background if there are too many - if (bg_inds.shape[0] > bg_rois_per_this_image) and use_random: - bg_inds = np.random.choice( - bg_inds, size=bg_rois_per_this_image, replace=False - ) - bg_inds = bg_inds[:bg_rois_per_this_image] - - keep_inds = np.append(fg_inds, bg_inds) - sampled_labels = max_classes[keep_inds] - sampled_labels[fg_rois_per_this_image:] = 0 - sampled_boxes = boxes[keep_inds] - sampled_max_overlap = max_overlaps[keep_inds] - sampled_gts = gt_boxes[box_to_gt_ind_map[keep_inds]] - sampled_gts[fg_rois_per_this_image:, :] = gt_boxes[0] - bbox_label_targets = _compute_targets( - sampled_boxes, sampled_gts, sampled_labels, bbox_reg_weights - ) - bbox_targets, bbox_inside_weights = _expand_bbox_targets( - bbox_label_targets, class_nums, is_cls_agnostic - ) - bbox_outside_weights = np.array( - bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype - ) - # Scale rois - sampled_rois = sampled_boxes * im_scale - - # Faster RCNN blobs - frcn_blobs = { - 'rois': sampled_rois, - 'labels_int32': sampled_labels, - 'bbox_targets': bbox_targets, - 'bbox_inside_weights': bbox_inside_weights, - 'bbox_outside_weights': bbox_outside_weights, - 'max_overlap': sampled_max_overlap, - } - return frcn_blobs - - -def _bbox_overlaps(roi_boxes, gt_boxes): - w1 = np.maximum(roi_boxes[:, 2] - roi_boxes[:, 0] + 1, 0) - h1 = np.maximum(roi_boxes[:, 3] - roi_boxes[:, 1] + 1, 0) - w2 = np.maximum(gt_boxes[:, 2] - gt_boxes[:, 0] + 1, 0) - h2 = np.maximum(gt_boxes[:, 3] - gt_boxes[:, 1] + 1, 0) - area1 = w1 * h1 - area2 = w2 * h2 - - overlaps = np.zeros((roi_boxes.shape[0], gt_boxes.shape[0])) - for ind1 in range(roi_boxes.shape[0]): - for ind2 in range(gt_boxes.shape[0]): - inter_x1 = np.maximum(roi_boxes[ind1, 0], gt_boxes[ind2, 0]) - inter_y1 = np.maximum(roi_boxes[ind1, 1], gt_boxes[ind2, 1]) - inter_x2 = np.minimum(roi_boxes[ind1, 2], gt_boxes[ind2, 2]) - inter_y2 = np.minimum(roi_boxes[ind1, 3], gt_boxes[ind2, 3]) - inter_w = np.maximum(inter_x2 - inter_x1 + 1, 0) - inter_h = np.maximum(inter_y2 - inter_y1 + 1, 0) - inter_area = inter_w * inter_h - iou = inter_area / (area1[ind1] + area2[ind2] - inter_area) - overlaps[ind1, ind2] = iou - return overlaps - - -def _compute_targets(roi_boxes, gt_boxes, labels, bbox_reg_weights): - assert roi_boxes.shape[0] == gt_boxes.shape[0] - assert roi_boxes.shape[1] == 4 - assert gt_boxes.shape[1] == 4 - - targets = np.zeros(roi_boxes.shape) - bbox_reg_weights = np.asarray(bbox_reg_weights) - targets = _box_to_delta( - ex_boxes=roi_boxes, gt_boxes=gt_boxes, weights=bbox_reg_weights - ) - - return np.hstack([labels[:, np.newaxis], targets]).astype( - np.float32, copy=False - ) - - -def _box_to_delta(ex_boxes, gt_boxes, weights): - ex_w = ex_boxes[:, 2] - ex_boxes[:, 0] + 1 - ex_h = ex_boxes[:, 3] - ex_boxes[:, 1] + 1 - ex_ctr_x = ex_boxes[:, 0] + 0.5 * ex_w - ex_ctr_y = ex_boxes[:, 1] + 0.5 * ex_h - - gt_w = gt_boxes[:, 2] - gt_boxes[:, 0] + 1 - gt_h = gt_boxes[:, 3] - gt_boxes[:, 1] + 1 - gt_ctr_x = gt_boxes[:, 0] + 0.5 * gt_w - gt_ctr_y = gt_boxes[:, 1] + 0.5 * gt_h - - dx = (gt_ctr_x - ex_ctr_x) / ex_w / weights[0] - dy = (gt_ctr_y - ex_ctr_y) / ex_h / weights[1] - dw = (np.log(gt_w / ex_w)) / weights[2] - dh = (np.log(gt_h / ex_h)) / weights[3] - - targets = np.vstack([dx, dy, dw, dh]).transpose() - return targets - - -def _expand_bbox_targets(bbox_targets_input, class_nums, is_cls_agnostic): - class_labels = bbox_targets_input[:, 0] - fg_inds = np.where(class_labels > 0)[0] - # if is_cls_agnostic: - # class_labels = [1 if ll > 0 else 0 for ll in class_labels] - # class_labels = np.array(class_labels, dtype=np.int32) - # class_nums = 2 - bbox_targets = np.zeros( - ( - class_labels.shape[0], - 4 * class_nums if not is_cls_agnostic else 4 * 2, - ) - ) - bbox_inside_weights = np.zeros(bbox_targets.shape) - for ind in fg_inds: - class_label = int(class_labels[ind]) if not is_cls_agnostic else 1 - start_ind = class_label * 4 - end_ind = class_label * 4 + 4 - bbox_targets[ind, start_ind:end_ind] = bbox_targets_input[ind, 1:] - bbox_inside_weights[ind, start_ind:end_ind] = (1.0, 1.0, 1.0, 1.0) - return bbox_targets, bbox_inside_weights - - -class TestGenerateProposalLabelsOp(OpTest): - def set_data(self): - # self.use_random = False - self.init_use_random() - self.init_test_params() - self.init_test_input() - self.init_test_cascade() - self.init_test_output() - - self.inputs = { - 'RpnRois': (self.rpn_rois[0], self.rpn_rois_lod), - 'GtClasses': (self.gt_classes[0], self.gts_lod), - 'IsCrowd': (self.is_crowd[0], self.gts_lod), - 'GtBoxes': (self.gt_boxes[0], self.gts_lod), - 'ImInfo': self.im_info, - } - if self.max_overlaps is not None: - self.inputs['MaxOverlap'] = ( - self.max_overlaps[0], - self.rpn_rois_lod, - ) - - self.attrs = { - 'batch_size_per_im': self.batch_size_per_im, - 'fg_fraction': self.fg_fraction, - 'fg_thresh': self.fg_thresh, - 'bg_thresh_hi': self.bg_thresh_hi, - 'bg_thresh_lo': self.bg_thresh_lo, - 'bbox_reg_weights': self.bbox_reg_weights, - 'class_nums': self.class_nums, - 'use_random': self.use_random, - 'is_cls_agnostic': self.is_cls_agnostic, - 'is_cascade_rcnn': self.is_cascade_rcnn, - } - self.outputs = { - 'Rois': (self.rois, [self.lod]), - 'LabelsInt32': (self.labels_int32, [self.lod]), - 'BboxTargets': (self.bbox_targets, [self.lod]), - 'BboxInsideWeights': (self.bbox_inside_weights, [self.lod]), - 'BboxOutsideWeights': (self.bbox_outside_weights, [self.lod]), - 'MaxOverlapWithGT': (self.max_overlap_with_gt, [self.lod]), - } - - def test_check_output(self): - # NODE(yjjiang11): This op will be deprecated. - self.check_output(check_dygraph=False) - - def setUp(self): - self.op_type = 'generate_proposal_labels' - self.set_data() - - def init_test_cascade( - self, - ): - self.is_cascade_rcnn = False - self.max_overlaps = None - - def init_use_random(self): - self.use_random = False - - def init_test_params(self): - self.batch_size_per_im = 100 - self.fg_fraction = 0.25 - self.fg_thresh = 0.5 - self.bg_thresh_hi = 0.5 - self.bg_thresh_lo = 0.0 - self.bbox_reg_weights = [0.1, 0.1, 0.2, 0.2] - self.is_cls_agnostic = False - self.class_nums = 2 if self.is_cls_agnostic else 81 - - def init_test_input(self): - np.random.seed(0) - gt_nums = 6 # Keep same with batch_size_per_im for unittest - proposal_nums = 200 - images_shape = [[64, 64]] - self.im_info = np.ones((len(images_shape), 3)).astype(np.float32) - for i in range(len(images_shape)): - self.im_info[i, 0] = images_shape[i][0] - self.im_info[i, 1] = images_shape[i][1] - self.im_info[i, 2] = 0.8 # scale - - self.rpn_rois, self.rpn_rois_lod = _generate_proposals( - images_shape, proposal_nums - ) - ground_truth, self.gts_lod = _generate_groundtruth( - images_shape, self.class_nums, gt_nums - ) - - self.gt_classes = [gt['gt_classes'] for gt in ground_truth] - self.gt_boxes = [gt['boxes'] for gt in ground_truth] - self.is_crowd = [gt['is_crowd'] for gt in ground_truth] - - def init_test_output(self): - ( - self.rois, - self.labels_int32, - self.bbox_targets, - self.bbox_inside_weights, - self.bbox_outside_weights, - self.max_overlap_with_gt, - self.lod, - ) = generate_proposal_labels_in_python( - self.rpn_rois, - self.gt_classes, - self.is_crowd, - self.gt_boxes, - self.im_info, - self.batch_size_per_im, - self.fg_fraction, - self.fg_thresh, - self.bg_thresh_hi, - self.bg_thresh_lo, - self.bbox_reg_weights, - self.class_nums, - self.use_random, - self.is_cls_agnostic, - self.is_cascade_rcnn, - self.max_overlaps, - ) - self.rois = np.vstack(self.rois) - self.labels_int32 = np.hstack(self.labels_int32) - self.labels_int32 = self.labels_int32[:, np.newaxis] - self.bbox_targets = np.vstack(self.bbox_targets) - self.bbox_inside_weights = np.vstack(self.bbox_inside_weights) - self.bbox_outside_weights = np.vstack(self.bbox_outside_weights) - self.max_overlap_with_gt = np.concatenate(self.max_overlap_with_gt) - - -class TestCascade(TestGenerateProposalLabelsOp): - def init_test_cascade(self): - self.is_cascade_rcnn = True - roi_num = len(self.rpn_rois[0]) - self.max_overlaps = [] - max_overlap = np.random.rand(roi_num).astype('float32') - # Make GT samples with overlap = 1 - max_overlap[max_overlap > 0.9] = 1.0 - self.max_overlaps.append(max_overlap) - - -class TestUseRandom(TestGenerateProposalLabelsOp): - def init_use_random(self): - self.use_random = True - self.is_cascade_rcnn = False - - def test_check_output(self): - self.check_output_customized(self.verify_out) - - def verify_out(self, outs): - print("skip") - - def init_test_params(self): - self.batch_size_per_im = 512 - self.fg_fraction = 0.025 - self.fg_thresh = 0.5 - self.bg_thresh_hi = 0.5 - self.bg_thresh_lo = 0.0 - self.bbox_reg_weights = [0.1, 0.1, 0.2, 0.2] - self.is_cls_agnostic = False - self.class_nums = 2 if self.is_cls_agnostic else 81 - - -class TestClsAgnostic(TestCascade): - def init_test_params(self): - self.batch_size_per_im = 512 - self.fg_fraction = 0.25 - self.fg_thresh = 0.5 - self.bg_thresh_hi = 0.5 - self.bg_thresh_lo = 0.0 - self.bbox_reg_weights = [0.1, 0.1, 0.2, 0.2] - self.is_cls_agnostic = True - self.class_nums = 2 if self.is_cls_agnostic else 81 - - -class TestOnlyGT(TestCascade): - def init_test_input(self): - np.random.seed(0) - gt_nums = 6 # Keep same with batch_size_per_im for unittest - proposal_nums = 6 - images_shape = [[64, 64]] - self.im_info = np.ones((len(images_shape), 3)).astype(np.float32) - for i in range(len(images_shape)): - self.im_info[i, 0] = images_shape[i][0] - self.im_info[i, 1] = images_shape[i][1] - self.im_info[i, 2] = 0.8 # scale - - ground_truth, self.gts_lod = _generate_groundtruth( - images_shape, self.class_nums, gt_nums - ) - - self.gt_classes = [gt['gt_classes'] for gt in ground_truth] - self.gt_boxes = [gt['boxes'] for gt in ground_truth] - self.is_crowd = [gt['is_crowd'] for gt in ground_truth] - self.rpn_rois = self.gt_boxes - self.rpn_rois_lod = self.gts_lod - - -class TestOnlyGT2(TestCascade): - def init_test_cascade(self): - self.is_cascade_rcnn = True - roi_num = len(self.rpn_rois[0]) - self.max_overlaps = [] - max_overlap = np.ones(roi_num).astype('float32') - self.max_overlaps.append(max_overlap) - - -def _generate_proposals(images_shape, proposal_nums): - rpn_rois = [] - rpn_rois_lod = [] - num_proposals = 0 - for i, image_shape in enumerate(images_shape): - proposals = _generate_boxes(image_shape, proposal_nums) - rpn_rois.append(proposals) - num_proposals = len(proposals) - rpn_rois_lod.append(num_proposals) - return rpn_rois, [rpn_rois_lod] - - -def _generate_groundtruth(images_shape, class_nums, gt_nums): - ground_truth = [] - gts_lod = [] - num_gts = 0 - for i, image_shape in enumerate(images_shape): - # Avoid background - gt_classes = np.random.randint( - low=1, high=class_nums, size=gt_nums - ).astype(np.int32) - gt_boxes = _generate_boxes(image_shape, gt_nums) - is_crowd = np.zeros((gt_nums), dtype=np.int32) - is_crowd[0] = 1 - ground_truth.append( - {'gt_classes': gt_classes, 'boxes': gt_boxes, 'is_crowd': is_crowd} - ) - num_gts += len(gt_classes) - gts_lod.append(num_gts) - return ground_truth, [gts_lod] - - -def _generate_boxes(image_size, box_nums): - width = image_size[0] - height = image_size[1] - xywh = np.random.rand(box_nums, 4) - xy1 = xywh[:, [0, 1]] * image_size - wh = xywh[:, [2, 3]] * (image_size - xy1) - xy2 = xy1 + wh - boxes = np.hstack([xy1, xy2]) - boxes[:, [0, 2]] = np.minimum( - width - 1.0, np.maximum(0.0, boxes[:, [0, 2]]) - ) - boxes[:, [1, 3]] = np.minimum( - height - 1.0, np.maximum(0.0, boxes[:, [1, 3]]) - ) - return boxes.astype(np.float32) - - -if __name__ == '__main__': - unittest.main()