diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt
index 2d7729b722ddb8..d5f2c6d7448d8f 100644
--- a/paddle/fluid/operators/detection/CMakeLists.txt
+++ b/paddle/fluid/operators/detection/CMakeLists.txt
@@ -28,18 +28,11 @@ function(detection_library TARGET_NAME)
       PARENT_SCOPE)
 endfunction()
 
-detection_library(density_prior_box_op SRCS density_prior_box_op.cc
-                  density_prior_box_op.cu)
-
 detection_library(bipartite_match_op SRCS bipartite_match_op.cc)
 detection_library(anchor_generator_op SRCS anchor_generator_op.cc
                   anchor_generator_op.cu)
-detection_library(generate_proposal_labels_op SRCS
-                  generate_proposal_labels_op.cc)
 detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc DEPS phi common)
 detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu)
-detection_library(box_decoder_and_assign_op SRCS box_decoder_and_assign_op.cc
-                  box_decoder_and_assign_op.cu)
 
 if(WITH_GPU OR WITH_ROCM)
   if(WITH_GPU)
@@ -62,8 +55,3 @@ endif()
 
 #Export local libraries to parent
 # set(DETECTION_LIBRARY ${LOCAL_DETECTION_LIBS} PARENT_SCOPE)
-
-cc_library(mask_util SRCS mask_util.cc)
-
-detection_library(generate_mask_labels_op SRCS generate_mask_labels_op.cc DEPS
-                  mask_util)
diff --git a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc
deleted file mode 100644
index a7b9ad490b56cf..00000000000000
--- a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc
+++ /dev/null
@@ -1,235 +0,0 @@
-/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/detection/box_decoder_and_assign_op.h"
-
-namespace paddle {
-namespace operators {
-
-class BoxDecoderAndAssignOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
-  void InferShape(framework::InferShapeContext *ctx) const override {
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("PriorBox"),
-        true,
-        phi::errors::NotFound("Input(PriorBox) of BoxDecoderAndAssignOp "
-                              "is not found."));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("PriorBoxVar"),
-        true,
-        phi::errors::NotFound("Input(PriorBoxVar) of BoxDecoderAndAssignOp"
-                              " is not found."));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("TargetBox"),
-        true,
-        phi::errors::NotFound("Input(TargetBox) of BoxDecoderAndAssignOp "
-                              "is not found."));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("BoxScore"),
-        true,
-        phi::errors::NotFound("Input(BoxScore) of BoxDecoderAndAssignOp "
-                              "is not found."));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasOutput("DecodeBox"),
-        true,
-        phi::errors::NotFound("Output(DecodeBox) of BoxDecoderAndAssignOp"
-                              " is not found."));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasOutput("OutputAssignBox"),
-        true,
-        phi::errors::NotFound("Output(OutputAssignBox) of "
-                              "BoxDecoderAndAssignOp is not found."));
-
-    auto prior_box_dims = ctx->GetInputDim("PriorBox");
-    auto prior_box_var_dims = ctx->GetInputDim("PriorBoxVar");
-    auto target_box_dims = ctx->GetInputDim("TargetBox");
-    auto box_score_dims = ctx->GetInputDim("BoxScore");
-
-    PADDLE_ENFORCE_EQ(
-        prior_box_dims.size(),
-        2,
-        phi::errors::InvalidArgument("The rank of Input of PriorBox must"
-                                     " be 2. But received rank = %d",
-                                     prior_box_dims.size()));
-    PADDLE_ENFORCE_EQ(
-        prior_box_dims[1],
-        4,
-        phi::errors::InvalidArgument(
-            "The shape of PriorBox is [N, 4], "
-            "and the second dimension must be 4. But received dimension = %d",
-            prior_box_dims[1]));
-    PADDLE_ENFORCE_EQ(
-        prior_box_var_dims.size(),
-        1,
-        phi::errors::InvalidArgument("The rank of Input of PriorBoxVar "
-                                     "must be 1. But received rank = %d",
-                                     prior_box_var_dims.size()));
-    PADDLE_ENFORCE_EQ(
-        prior_box_var_dims[0],
-        4,
-        phi::errors::InvalidArgument("The shape of PriorBoxVar is [4]. "
-                                     "But received dimension = %d",
-                                     prior_box_var_dims[0]));
-    PADDLE_ENFORCE_EQ(
-        target_box_dims.size(),
-        2,
-        phi::errors::InvalidArgument("The rank of Input of TargetBox must "
-                                     "be 2. But received rank = %d",
-                                     target_box_dims.size()));
-    PADDLE_ENFORCE_EQ(
-        box_score_dims.size(),
-        2,
-        phi::errors::InvalidArgument("The rank of Input of BoxScore must "
-                                     "be 2. But received rank = %d",
-                                     box_score_dims.size()));
-    if (ctx->IsRuntime()) {
-      PADDLE_ENFORCE_EQ(
-          prior_box_dims[0],
-          target_box_dims[0],
-          phi::errors::InvalidArgument(
-              "The first dimension of prior_box and "
-              "target_box is the number of box and should be same. But "
-              "received dimension of prior_box is %d, dimension of target_box "
-              "is %d",
-              prior_box_dims[0],
-              target_box_dims[0]));
-      PADDLE_ENFORCE_EQ(
-          prior_box_dims[0],
-          box_score_dims[0],
-          phi::errors::InvalidArgument(
-              "The first dimension of prior_box and "
-              "box_score is the number of box and should be same. But received "
-              "dimension of prior_box is %d, dimension of box_score is %d",
-              prior_box_dims[0],
-              box_score_dims[0]));
-      PADDLE_ENFORCE_EQ(
-          target_box_dims[1],
-          box_score_dims[1] * prior_box_dims[1],
-          phi::errors::InvalidArgument(
-              "The shape of target_box is "
-              "[N, classnum * 4], The shape of box_score is [N, classnum], "
-              "The shape of prior_box is [N, 4]. But received second dimension "
-              "of "
-              "target_box is %d, second dimension of box_score_dims is %d, "
-              "and second dimension of prior_box_dims is %d",
-              target_box_dims[1],
-              box_score_dims[1],
-              prior_box_dims[1]));
-    }
-    ctx->SetOutputDim(
-        "DecodeBox",
-        common::make_ddim({target_box_dims[0], target_box_dims[1]}));
-    ctx->ShareLoD("TargetBox", /*->*/ "DecodeBox");
-    ctx->SetOutputDim(
-        "OutputAssignBox",
-        common::make_ddim({prior_box_dims[0], prior_box_dims[1]}));
-    ctx->ShareLoD("PriorBox", /*->*/ "OutputAssignBox");
-  }
-};
-
-class BoxDecoderAndAssignOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override {
-    AddInput(
-        "PriorBox",
-        "(Tensor, default Tensor<float>) "
-        "Box list PriorBox is a 2-D Tensor with shape [N, 4] which holds N "
-        "boxes and each box is represented as [xmin, ymin, xmax, ymax], "
-        "[xmin, ymin] is the left top coordinate of the anchor box, "
-        "if the input is image feature map, they are close to the origin "
-        "of the coordinate system. [xmax, ymax] is the right bottom "
-        "coordinate of the anchor box.");
-    AddInput("PriorBoxVar",
-             "(Tensor, default Tensor<float>, optional) "
-             "PriorBoxVar is a 2-D Tensor with shape [N, 4] which holds N "
-             "group of variance. PriorBoxVar will set all elements to 1 by "
-             "default.")
-        .AsDispensable();
-    AddInput("TargetBox",
-             "(phi::DenseTensor or Tensor) "
-             "This input can be a 2-D phi::DenseTensor with shape "
-             "[N, classnum*4]. It holds N targets for N boxes.");
-    AddInput("BoxScore",
-             "(phi::DenseTensor or Tensor) "
-             "This input can be a 2-D phi::DenseTensor with shape "
-             "[N, classnum], each box is represented as [classnum] which is "
-             "the classification probabilities.");
-    AddAttr<float>("box_clip",
-                   "(float, default 4.135, np.log(1000. / 16.)) "
-                   "clip box to prevent overflowing")
-        .SetDefault(4.135f);
-    AddOutput("DecodeBox",
-              "(phi::DenseTensor or Tensor) "
-              "the output tensor of op with shape [N, classnum * 4] "
-              "representing the result of N target boxes decoded with "
-              "M Prior boxes and variances for each class.");
-    AddOutput("OutputAssignBox",
-              "(phi::DenseTensor or Tensor) "
-              "the output tensor of op with shape [N, 4] "
-              "representing the result of N target boxes decoded with "
-              "M Prior boxes and variances with the best non-background class "
-              "by BoxScore.");
-    AddComment(R"DOC(
-
-Bounding Box Coder.
-
-Decode the target bounding box with the prior_box information.
-
-The Decoding schema is described below:
-
-    $$
-    ox = (pw \\times pxv \\times tx + px) - \\frac{tw}{2}
-    $$
-    $$
-    oy = (ph \\times pyv \\times ty + py) - \\frac{th}{2}
-    $$
-    $$
-    ow = \\exp (pwv \\times tw) \\times pw + \\frac{tw}{2}
-    $$
-    $$
-    oh = \\exp (phv \\times th) \\times ph + \\frac{th}{2}
-    $$
-
-where `tx`, `ty`, `tw`, `th` denote the target box's center coordinates, width
-and height respectively. Similarly, `px`, `py`, `pw`, `ph` denote the
-prior_box's (anchor) center coordinates, width and height. `pxv`, `pyv`, `pwv`,
-`phv` denote the variance of the prior_box and `ox`, `oy`, `ow`, `oh` denote the
-decoded coordinates, width and height in decode_box.
-
-decode_box is obtained after box decode, then assigning schema is described below:
-
-For each prior_box, use the best non-background class's decoded values to
-update the prior_box locations and get output_assign_box. So, the shape of
-output_assign_box is the same as PriorBox.
-)DOC");
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-REGISTER_OPERATOR(
-    box_decoder_and_assign,
-    ops::BoxDecoderAndAssignOp,
-    ops::BoxDecoderAndAssignOpMaker,
-    paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
-    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
-
-PD_REGISTER_STRUCT_KERNEL(box_decoder_and_assign,
-                          CPU,
-                          ALL_LAYOUT,
-                          ops::BoxDecoderAndAssignKernel,
-                          float,
-                          double) {}
diff --git a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cu b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cu
deleted file mode 100644
index a956a58ac75f7c..00000000000000
--- a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cu
+++ /dev/null
@@ -1,159 +0,0 @@
-/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/detection/box_decoder_and_assign_op.h"
-#include "paddle/fluid/memory/memcpy.h"
-#include "paddle/phi/backends/gpu/gpu_primitives.h"
-
-namespace paddle {
-namespace operators {
-
-template <typename T>
-__global__ void DecodeBoxKernel(const T* prior_box_data,
-                                const T* prior_box_var_data,
-                                const T* target_box_data,
-                                const int roi_num,
-                                const int class_num,
-                                const T box_clip,
-                                T* output_box_data) {
-  const int idx = threadIdx.x + blockIdx.x * blockDim.x;
-  if (idx < roi_num * class_num) {
-    int i = idx / class_num;
-    int j = idx % class_num;
-    T prior_box_width = prior_box_data[i * 4 + 2] - prior_box_data[i * 4] + 1;
-    T prior_box_height =
-        prior_box_data[i * 4 + 3] - prior_box_data[i * 4 + 1] + 1;
-    T prior_box_center_x = prior_box_data[i * 4] + prior_box_width / 2;
-    T prior_box_center_y = prior_box_data[i * 4 + 1] + prior_box_height / 2;
-
-    int offset = i * class_num * 4 + j * 4;
-    T dw = prior_box_var_data[2] * target_box_data[offset + 2];
-    T dh = prior_box_var_data[3] * target_box_data[offset + 3];
-    if (dw > box_clip) {
-      dw = box_clip;
-    }
-    if (dh > box_clip) {
-      dh = box_clip;
-    }
-    T target_box_center_x = 0, target_box_center_y = 0;
-    T target_box_width = 0, target_box_height = 0;
-    target_box_center_x =
-        prior_box_var_data[0] * target_box_data[offset] * prior_box_width +
-        prior_box_center_x;
-    target_box_center_y =
-        prior_box_var_data[1] * target_box_data[offset + 1] * prior_box_height +
-        prior_box_center_y;
-    target_box_width = expf(dw) * prior_box_width;
-    target_box_height = expf(dh) * prior_box_height;
-
-    output_box_data[offset] = target_box_center_x - target_box_width / 2;
-    output_box_data[offset + 1] = target_box_center_y - target_box_height / 2;
-    output_box_data[offset + 2] =
-        target_box_center_x + target_box_width / 2 - 1;
-    output_box_data[offset + 3] =
-        target_box_center_y + target_box_height / 2 - 1;
-  }
-}
-
-template <typename T>
-__global__ void AssignBoxKernel(const T* prior_box_data,
-                                const T* box_score_data,
-                                T* output_box_data,
-                                const int roi_num,
-                                const int class_num,
-                                T* output_assign_box_data) {
-  const int idx = threadIdx.x + blockIdx.x * blockDim.x;
-  if (idx < roi_num) {
-    int i = idx;
-    T max_score = -1;
-    int max_j = -1;
-    for (int j = 0; j < class_num; ++j) {
-      T score = box_score_data[i * class_num + j];
-      if (score > max_score && j > 0) {
-        max_score = score;
-        max_j = j;
-      }
-    }
-    if (max_j > 0) {
-      for (int pno = 0; pno < 4; pno++) {
-        output_assign_box_data[i * 4 + pno] =
-            output_box_data[i * class_num * 4 + max_j * 4 + pno];
-      }
-    } else {
-      for (int pno = 0; pno < 4; pno++) {
-        output_assign_box_data[i * 4 + pno] = prior_box_data[i * 4 + pno];
-      }
-    }
-  }
-}
-
-template <typename T, typename DeviceContext>
-class BoxDecoderAndAssignCUDAKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    auto* prior_box = context.Input<phi::DenseTensor>("PriorBox");
-    auto* prior_box_var = context.Input<phi::DenseTensor>("PriorBoxVar");
-    auto* target_box = context.Input<phi::DenseTensor>("TargetBox");
-    auto* box_score = context.Input<phi::DenseTensor>("BoxScore");
-    auto* output_box = context.Output<phi::DenseTensor>("DecodeBox");
-    auto* output_assign_box =
-        context.Output<phi::DenseTensor>("OutputAssignBox");
-
-    auto roi_num = target_box->dims()[0];
-    auto class_num = box_score->dims()[1];
-    auto* target_box_data = target_box->data<T>();
-    auto* prior_box_data = prior_box->data<T>();
-    auto* prior_box_var_data = prior_box_var->data<T>();
-    auto* box_score_data = box_score->data<T>();
-    output_box->mutable_data<T>({roi_num, class_num * 4}, context.GetPlace());
-    output_assign_box->mutable_data<T>({roi_num, 4}, context.GetPlace());
-    T* output_box_data = output_box->data<T>();
-    T* output_assign_box_data = output_assign_box->data<T>();
-
-    int block = 512;
-    int grid = (roi_num * class_num + block - 1) / block;
-    auto& device_ctx = context.cuda_device_context();
-
-    const T box_clip = static_cast<T>(context.Attr<float>("box_clip"));
-
-    DecodeBoxKernel<T>
-        <<<grid, block, 0, device_ctx.stream()>>>(prior_box_data,
-                                                  prior_box_var_data,
-                                                  target_box_data,
-                                                  roi_num,
-                                                  class_num,
-                                                  box_clip,
-                                                  output_box_data);
-
-    context.device_context().Wait();
-    int assign_grid = (roi_num + block - 1) / block;
-    AssignBoxKernel<T><<<assign_grid, block, 0, device_ctx.stream()>>>(
-        prior_box_data,
-        box_score_data,
-        output_box_data,
-        roi_num,
-        class_num,
-        output_assign_box_data);
-    context.device_context().Wait();
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-
-PD_REGISTER_STRUCT_KERNEL(box_decoder_and_assign,
-                          GPU,
-                          ALL_LAYOUT,
-                          ops::BoxDecoderAndAssignCUDAKernel,
-                          float,
-                          double) {}
diff --git a/paddle/fluid/operators/detection/box_decoder_and_assign_op.h b/paddle/fluid/operators/detection/box_decoder_and_assign_op.h
deleted file mode 100644
index 5a191ffaf44746..00000000000000
--- a/paddle/fluid/operators/detection/box_decoder_and_assign_op.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-#include <algorithm>
-#include <string>
-#include <vector>
-
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/phi/kernels/funcs/math_function.h"
-
-namespace paddle {
-namespace operators {
-
-template <typename T, typename DeviceContext>
-class BoxDecoderAndAssignKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    auto* prior_box = context.Input<phi::DenseTensor>("PriorBox");
-    auto* prior_box_var = context.Input<phi::DenseTensor>("PriorBoxVar");
-    auto* target_box = context.Input<phi::DenseTensor>("TargetBox");
-    auto* box_score = context.Input<phi::DenseTensor>("BoxScore");
-    auto* output_box = context.Output<phi::DenseTensor>("DecodeBox");
-    auto* output_assign_box =
-        context.Output<phi::DenseTensor>("OutputAssignBox");
-    int roi_num = target_box->dims()[0];
-    int class_num = box_score->dims()[1];
-    auto* target_box_data = target_box->data<T>();
-    auto* prior_box_data = prior_box->data<T>();
-    auto* prior_box_var_data = prior_box_var->data<T>();
-    auto* box_score_data = box_score->data<T>();
-    output_box->mutable_data<T>({roi_num, class_num * 4}, context.GetPlace());
-    output_assign_box->mutable_data<T>({roi_num, 4}, context.GetPlace());
-    T* output_box_data = output_box->data<T>();
-    T* output_assign_box_data = output_assign_box->data<T>();
-    const T bbox_clip = static_cast<T>(context.Attr<float>("box_clip"));
-
-    for (int i = 0; i < roi_num; ++i) {
-      T prior_box_width = prior_box_data[i * 4 + 2] - prior_box_data[i * 4] + 1;
-      T prior_box_height =
-          prior_box_data[i * 4 + 3] - prior_box_data[i * 4 + 1] + 1;
-      T prior_box_center_x = prior_box_data[i * 4] + prior_box_width / 2;
-      T prior_box_center_y = prior_box_data[i * 4 + 1] + prior_box_height / 2;
-      for (int j = 0; j < class_num; ++j) {
-        int64_t offset = i * class_num * 4 + j * 4;
-        T dw = std::min(prior_box_var_data[2] * target_box_data[offset + 2],
-                        bbox_clip);
-        T dh = std::min(prior_box_var_data[3] * target_box_data[offset + 3],
-                        bbox_clip);
-        T target_box_center_x = 0, target_box_center_y = 0;
-        T target_box_width = 0, target_box_height = 0;
-        target_box_center_x =
-            prior_box_var_data[0] * target_box_data[offset] * prior_box_width +
-            prior_box_center_x;
-        target_box_center_y = prior_box_var_data[1] *
-                                  target_box_data[offset + 1] *
-                                  prior_box_height +
-                              prior_box_center_y;
-        target_box_width = std::exp(dw) * prior_box_width;
-        target_box_height = std::exp(dh) * prior_box_height;
-
-        output_box_data[offset] = target_box_center_x - target_box_width / 2;
-        output_box_data[offset + 1] =
-            target_box_center_y - target_box_height / 2;
-        output_box_data[offset + 2] =
-            target_box_center_x + target_box_width / 2 - 1;
-        output_box_data[offset + 3] =
-            target_box_center_y + target_box_height / 2 - 1;
-      }
-
-      T max_score = -1;
-      int max_j = -1;
-      for (int j = 0; j < class_num; ++j) {
-        T score = box_score_data[i * class_num + j];
-        if (score > max_score && j > 0) {
-          max_score = score;
-          max_j = j;
-        }
-      }
-
-      if (max_j > 0) {
-        for (int pno = 0; pno < 4; pno++) {
-          output_assign_box_data[i * 4 + pno] =
-              output_box_data[i * class_num * 4 + max_j * 4 + pno];
-        }
-      } else {
-        for (int pno = 0; pno < 4; pno++) {
-          output_assign_box_data[i * 4 + pno] = prior_box_data[i * 4 + pno];
-        }
-      }
-    }
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
diff --git a/paddle/fluid/operators/detection/density_prior_box_op.cc b/paddle/fluid/operators/detection/density_prior_box_op.cc
deleted file mode 100644
index 4a533615aab158..00000000000000
--- a/paddle/fluid/operators/detection/density_prior_box_op.cc
+++ /dev/null
@@ -1,279 +0,0 @@
-/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/detection/density_prior_box_op.h"
-
-namespace paddle {
-namespace operators {
-
-class DensityPriorBoxOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    OP_INOUT_CHECK(
-        ctx->HasInput("Input"), "Input", "Input", "DensityPriorBoxOp");
-    OP_INOUT_CHECK(
-        ctx->HasInput("Image"), "Input", "Image", "DensityPriorBoxOp");
-
-    auto image_dims = ctx->GetInputDim("Image");
-    auto input_dims = ctx->GetInputDim("Input");
-    PADDLE_ENFORCE_EQ(
-        image_dims.size(),
-        4,
-        phi::errors::InvalidArgument(
-            "The Input(Image) of Op(density_prior_box) should be a 4-D Tensor "
-            "and data format is NCHW. But received Image's dimensions = %d, "
-            "shape = [%s].",
-            image_dims.size(),
-            image_dims));
-    PADDLE_ENFORCE_EQ(
-        input_dims.size(),
-        4,
-        phi::errors::InvalidArgument(
-            "The Input(Input) of Op(density_prior_box) should be a 4-D Tensor "
-            "and data format is NCHW. But received Input's dimensions = %d, "
-            "shape = [%s].",
-            input_dims.size(),
-            input_dims));
-
-    if (ctx->IsRuntime()) {
-      PADDLE_ENFORCE_LT(
-          input_dims[2],
-          image_dims[2],
-          phi::errors::InvalidArgument(
-              "The input tensor Input's height"
-              "of DensityPriorBoxOp should be smaller than input tensor Image's"
-              "height. But received Input's height = %d, Image's height = %d",
-              input_dims[2],
-              image_dims[2]));
-
-      PADDLE_ENFORCE_LT(
-          input_dims[3],
-          image_dims[3],
-          phi::errors::InvalidArgument(
-              "The input tensor Input's width"
-              "of DensityPriorBoxOp should be smaller than input tensor Image's"
-              "width. But received Input's width = %d, Image's width = %d",
-              input_dims[3],
-              image_dims[3]));
-    }
-    auto variances = ctx->Attrs().Get<std::vector<float>>("variances");
-
-    auto fixed_sizes = ctx->Attrs().Get<std::vector<float>>("fixed_sizes");
-    auto fixed_ratios = ctx->Attrs().Get<std::vector<float>>("fixed_ratios");
-    auto densities = ctx->Attrs().Get<std::vector<int>>("densities");
-    bool flatten = ctx->Attrs().Get<bool>("flatten_to_2d");
-
-    PADDLE_ENFORCE_EQ(
-        fixed_sizes.size(),
-        densities.size(),
-        phi::errors::InvalidArgument(
-            "The length of fixed_sizes and densities must be equal. "
-            "But received: fixed_sizes's length is %d, densities's length "
-            "is %d",
-            fixed_sizes.size(),
-            densities.size()));
-    size_t num_priors = 0;
-    for (auto density : densities) {
-      num_priors += (fixed_ratios.size()) * (pow(density, 2));  // NOLINT
-    }
-    if (!flatten) {
-      std::vector<int64_t> dim_vec(4);
-      dim_vec[0] = input_dims[2];
-      dim_vec[1] = input_dims[3];
-      dim_vec[2] = static_cast<int64_t>(num_priors);
-      dim_vec[3] = 4;
-      ctx->SetOutputDim("Boxes", common::make_ddim(dim_vec));
-      ctx->SetOutputDim("Variances", common::make_ddim(dim_vec));
-    } else if (ctx->IsRuntime()) {
-      int64_t dim0 =
-          static_cast<int64_t>(input_dims[2] * input_dims[3] * num_priors);
-      ctx->SetOutputDim("Boxes", {dim0, 4});
-      ctx->SetOutputDim("Variances", {dim0, 4});
-    } else {
-      ctx->SetOutputDim("Boxes", {-1, 4});
-      ctx->SetOutputDim("Variances", {-1, 4});
-    }
-  }
-
- protected:
-  phi::KernelKey GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "Input"),
-                          ctx.GetPlace());
-  }
-};
-
-class DensityPriorBoxOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override {
-    AddInput(
-        "Input",
-        "(Tensor, default Tensor<float>), "
-        "the input feature data of DensityPriorBoxOp, the layout is NCHW.");
-    AddInput("Image",
-             "(Tensor, default Tensor<float>), "
-             "the input image data of DensityPriorBoxOp, the layout is NCHW.");
-    AddOutput("Boxes",
-              "(Tensor, default Tensor<float>), the output prior boxes of "
-              "DensityPriorBoxOp. The layout is [H, W, num_priors, 4]. "
-              "H is the height of input, W is the width of input, num_priors "
-              "is the box count of each position.");
-    AddOutput("Variances",
-              "(Tensor, default Tensor<float>), the expanded variances of "
-              "DensityPriorBoxOp. The layout is [H, W, num_priors, 4]. "
-              "H is the height of input, W is the width of input, num_priors "
-              "is the box count of each position.");
-    AddAttr<std::vector<float>>("variances",
-                                "(vector<float>) List of variances to be "
-                                "encoded in density prior boxes.")
-        .AddCustomChecker([](const std::vector<float>& variances) {
-          PADDLE_ENFORCE_EQ(variances.size(),
-                            4,
-                            phi::errors::InvalidArgument(
-                                "The length of variance must "
-                                "be 4. But received: variances' length is %d.",
-                                variances.size()));
-          for (size_t i = 0; i < variances.size(); ++i) {
-            PADDLE_ENFORCE_GT(variances[i],
-                              0.0,
-                              phi::errors::OutOfRange(
-                                  "variance[%d] must be greater "
-                                  "than 0. But received: variance[%d] = %f",
-                                  i,
-                                  i,
-                                  variances[i]));
-          }
-        });
-    AddAttr<bool>("clip", "(bool) Whether to clip out-of-boundary boxes.")
-        .SetDefault(true);
-    AddAttr<bool>("flatten_to_2d",
-                  "(bool) Whether to flatten to 2D and "
-                  "the second dim is 4.")
-        .SetDefault(false);
-    AddAttr<float>(
-        "step_w",
-        "Density prior boxes step across width, 0.0 for auto calculation.")
-        .SetDefault(0.0)
-        .AddCustomChecker([](const float& step_w) {
-          PADDLE_ENFORCE_GE(
-              step_w,
-              0.0,
-              phi::errors::InvalidArgument("step_w should be larger "
-                                           "than 0. But received: step_w = %f.",
-                                           step_w));
-        });
-    AddAttr<float>(
-        "step_h",
-        "Density prior boxes step across height, 0.0 for auto calculation.")
-        .SetDefault(0.0)
-        .AddCustomChecker([](const float& step_h) {
-          PADDLE_ENFORCE_GE(
-              step_h,
-              0.0,
-              phi::errors::InvalidArgument("step_h should be larger "
-                                           "than 0. But received: step_h = %f.",
-                                           step_h));
-        });
-
-    AddAttr<float>("offset",
-                   "(float) "
-                   "Density prior boxes center offset.")
-        .SetDefault(0.5);
-    AddAttr<std::vector<float>>("fixed_sizes",
-                                "(vector<float>) List of fixed sizes "
-                                "of generated density prior boxes.")
-        .SetDefault(std::vector<float>{})
-        .AddCustomChecker([](const std::vector<float>& fixed_sizes) {
-          for (size_t i = 0; i < fixed_sizes.size(); ++i) {
-            PADDLE_ENFORCE_GT(
-                fixed_sizes[i],
-                0.0,
-                phi::errors::OutOfRange(
-                    "fixed_sizes[%d] should be "
-                    "larger than 0. But received: fixed_sizes[%d] = %f",
-                    i,
-                    i,
-                    fixed_sizes[i]));
-          }
-        });
-
-    AddAttr<std::vector<float>>("fixed_ratios",
-                                "(vector<float>) List of fixed ratios "
-                                "of generated density prior boxes.")
-        .SetDefault(std::vector<float>{})
-        .AddCustomChecker([](const std::vector<float>& fixed_ratios) {
-          for (size_t i = 0; i < fixed_ratios.size(); ++i) {
-            PADDLE_ENFORCE_GT(
-                fixed_ratios[i],
-                0.0,
-                phi::errors::OutOfRange(
-                    "fixed_ratios[%d] should be "
-                    "larger than 0. But received: fixed_ratios[%d] = %f",
-                    i,
-                    i,
-                    fixed_ratios[i]));
-          }
-        });
-
-    AddAttr<std::vector<int>>("densities",
-                              "(vector<float>) List of densities "
-                              "of generated density prior boxes.")
-        .SetDefault(std::vector<int>{})
-        .AddCustomChecker([](const std::vector<int>& densities) {
-          for (size_t i = 0; i < densities.size(); ++i) {
-            PADDLE_ENFORCE_GT(
-                densities[i],
-                0,
-                phi::errors::OutOfRange(
-                    "densities[%d] should be "
-                    "larger than 0. But received: densities[%d] = %f.",
-                    i,
-                    i,
-                    densities[i]));
-          }
-        });
-    AddComment(R"DOC(
-        Density Prior box operator
-        Each position of the input produce N density prior boxes, N is determined by
-        the count of fixed_ratios, densities, the calculation of N is as follows:
-        for density in densities:
-        N += size(fixed_ratios)*density^2
-        )DOC");
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-REGISTER_OPERATOR(
-    density_prior_box,
-    ops::DensityPriorBoxOp,
-    ops::DensityPriorBoxOpMaker,
-    paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
-    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
-
-PD_REGISTER_STRUCT_KERNEL(density_prior_box,
-                          CPU,
-                          ALL_LAYOUT,
-                          ops::DensityPriorBoxOpKernel,
-                          float,
-                          double) {}
-
-REGISTER_OP_KERNEL(prior_box,
-                   MKLDNN,
-                   ::paddle::platform::CPUPlace,
-                   ops::PriorBoxOpKernel<float>,
-                   ops::PriorBoxOpKernel<double>,
-                   ops::PriorBoxOpKernel<uint8_t>,
-                   ops::PriorBoxOpKernel<int8_t>);
diff --git a/paddle/fluid/operators/detection/density_prior_box_op.cu b/paddle/fluid/operators/detection/density_prior_box_op.cu
deleted file mode 100644
index 016b2e0bc93529..00000000000000
--- a/paddle/fluid/operators/detection/density_prior_box_op.cu
+++ /dev/null
@@ -1,197 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/detection/density_prior_box_op.h"
-
-namespace paddle {
-namespace operators {
-
-template <typename T>
-static __device__ inline T Clip(T in) {
-  return min(max(in, 0.), 1.);
-}
-
-template <typename T>
-static __global__ void GenDensityPriorBox(const int height,
-                                          const int width,
-                                          const int im_height,
-                                          const int im_width,
-                                          const T offset,
-                                          const T step_width,
-                                          const T step_height,
-                                          const int num_priors,
-                                          const T* ratios_shift,
-                                          bool is_clip,
-                                          const T var_xmin,
-                                          const T var_ymin,
-                                          const T var_xmax,
-                                          const T var_ymax,
-                                          T* out,
-                                          T* var) {
-  int gidx = blockIdx.x * blockDim.x + threadIdx.x;
-  int gidy = blockIdx.y * blockDim.y + threadIdx.y;
-  int step_x = blockDim.x * gridDim.x;
-  int step_y = blockDim.y * gridDim.y;
-
-  const T* width_ratio = ratios_shift;
-  const T* height_ratio = ratios_shift + num_priors;
-  const T* width_shift = ratios_shift + 2 * num_priors;
-  const T* height_shift = ratios_shift + 3 * num_priors;
-
-  for (int j = gidy; j < height; j += step_y) {
-    for (int i = gidx; i < width * num_priors; i += step_x) {
-      int h = j;
-      int w = i / num_priors;
-      int k = i % num_priors;
-
-      T center_x = (w + offset) * step_width;
-      T center_y = (h + offset) * step_height;
-
-      T center_x_temp = center_x + width_shift[k];
-      T center_y_temp = center_y + height_shift[k];
-
-      T box_width_ratio = width_ratio[k] / 2.;
-      T box_height_ratio = height_ratio[k] / 2.;
-
-      T xmin = max((center_x_temp - box_width_ratio) / im_width, 0.);
-      T ymin = max((center_y_temp - box_height_ratio) / im_height, 0.);
-      T xmax = min((center_x_temp + box_width_ratio) / im_width, 1.);
-      T ymax = min((center_y_temp + box_height_ratio) / im_height, 1.);
-
-      int out_offset = (j * width * num_priors + i) * 4;
-      out[out_offset] = is_clip ? Clip<T>(xmin) : xmin;
-      out[out_offset + 1] = is_clip ? Clip<T>(ymin) : ymin;
-      out[out_offset + 2] = is_clip ? Clip<T>(xmax) : xmax;
-      out[out_offset + 3] = is_clip ? Clip<T>(ymax) : ymax;
-
-      var[out_offset] = var_xmin;
-      var[out_offset + 1] = var_ymin;
-      var[out_offset + 2] = var_xmax;
-      var[out_offset + 3] = var_ymax;
-    }
-  }
-}
-
-template <typename T, typename DeviceContext>
-class DensityPriorBoxOpCUDAKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* input = ctx.Input<phi::DenseTensor>("Input");
-    auto* image = ctx.Input<phi::DenseTensor>("Image");
-    auto* boxes = ctx.Output<phi::DenseTensor>("Boxes");
-    auto* vars = ctx.Output<phi::DenseTensor>("Variances");
-
-    auto variances = ctx.Attr<std::vector<float>>("variances");
-    auto is_clip = ctx.Attr<bool>("clip");
-
-    auto fixed_sizes = ctx.Attr<std::vector<float>>("fixed_sizes");
-    auto fixed_ratios = ctx.Attr<std::vector<float>>("fixed_ratios");
-    auto densities = ctx.Attr<std::vector<int>>("densities");
-
-    T step_w = static_cast<T>(ctx.Attr<float>("step_w"));
-    T step_h = static_cast<T>(ctx.Attr<float>("step_h"));
-    T offset = static_cast<T>(ctx.Attr<float>("offset"));
-
-    auto img_width = image->dims()[3];
-    auto img_height = image->dims()[2];
-
-    auto feature_width = input->dims()[3];
-    auto feature_height = input->dims()[2];
-
-    T step_width, step_height;
-    if (step_w == 0 || step_h == 0) {
-      step_width = static_cast<T>(img_width) / feature_width;
-      step_height = static_cast<T>(img_height) / feature_height;
-    } else {
-      step_width = step_w;
-      step_height = step_h;
-    }
-
-    int num_priors = 0;
-    for (size_t i = 0; i < densities.size(); ++i) {
-      num_priors += (fixed_ratios.size()) * (pow(densities[i], 2));
-    }
-    int step_average = static_cast<int>((step_width + step_height) * 0.5);
-
-    phi::DenseTensor h_temp;
-    T* tdata = h_temp.mutable_data<T>({num_priors * 4}, platform::CPUPlace());
-    int idx = 0;
-    for (size_t s = 0; s < fixed_sizes.size(); ++s) {
-      auto fixed_size = fixed_sizes[s];
-      int density = densities[s];
-      for (size_t r = 0; r < fixed_ratios.size(); ++r) {
-        float ar = fixed_ratios[r];
-        int shift = step_average / density;
-        float box_width_ratio = fixed_size * sqrt(ar);
-        float box_height_ratio = fixed_size / sqrt(ar);
-        for (int di = 0; di < density; ++di) {
-          for (int dj = 0; dj < density; ++dj) {
-            float center_x_temp = shift / 2. + dj * shift - step_average / 2.;
-            float center_y_temp = shift / 2. + di * shift - step_average / 2.;
-            tdata[idx] = box_width_ratio;
-            tdata[num_priors + idx] = box_height_ratio;
-            tdata[2 * num_priors + idx] = center_x_temp;
-            tdata[3 * num_priors + idx] = center_y_temp;
-            idx++;
-          }
-        }
-      }
-    }
-
-    boxes->mutable_data<T>(ctx.GetPlace());
-    vars->mutable_data<T>(ctx.GetPlace());
-
-    phi::DenseTensor d_temp;
-    framework::TensorCopy(h_temp, ctx.GetPlace(), &d_temp);
-
-    // At least use 32 threads, at most 512 threads.
-    // blockx is multiple of 32.
-    int blockx = std::min(
-        static_cast<int64_t>(((feature_width * num_priors + 31) >> 5) << 5),
-        static_cast<int64_t>(512L));
-    int gridx = (feature_width * num_priors + blockx - 1) / blockx;
-    dim3 threads(blockx, 1);
-    dim3 grids(gridx, feature_height);
-
-    auto stream = ctx.template device_context<phi::GPUContext>().stream();
-    GenDensityPriorBox<T><<<grids, threads, 0, stream>>>(feature_height,
-                                                         feature_width,
-                                                         img_height,
-                                                         img_width,
-                                                         offset,
-                                                         step_width,
-                                                         step_height,
-                                                         num_priors,
-                                                         d_temp.data<T>(),
-                                                         is_clip,
-                                                         variances[0],
-                                                         variances[1],
-                                                         variances[2],
-                                                         variances[3],
-                                                         boxes->data<T>(),
-                                                         vars->data<T>());
-  }
-};  // namespace operators
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-
-PD_REGISTER_STRUCT_KERNEL(density_prior_box,
-                          GPU,
-                          ALL_LAYOUT,
-                          ops::DensityPriorBoxOpCUDAKernel,
-                          float,
-                          double) {}
diff --git a/paddle/fluid/operators/detection/density_prior_box_op.h b/paddle/fluid/operators/detection/density_prior_box_op.h
deleted file mode 100644
index 995abf11200130..00000000000000
--- a/paddle/fluid/operators/detection/density_prior_box_op.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-#include <algorithm>
-#include <vector>
-
-#include "paddle/fluid/operators/detection/prior_box_op.h"
-
-namespace paddle {
-namespace operators {
-
-template <typename T, typename DeviceContext>
-class DensityPriorBoxOpKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* input = ctx.Input<phi::DenseTensor>("Input");
-    auto* image = ctx.Input<phi::DenseTensor>("Image");
-    auto* boxes = ctx.Output<phi::DenseTensor>("Boxes");
-    auto* vars = ctx.Output<phi::DenseTensor>("Variances");
-
-    auto variances = ctx.Attr<std::vector<float>>("variances");
-    auto clip = ctx.Attr<bool>("clip");
-
-    auto fixed_sizes = ctx.Attr<std::vector<float>>("fixed_sizes");
-    auto fixed_ratios = ctx.Attr<std::vector<float>>("fixed_ratios");
-    auto densities = ctx.Attr<std::vector<int>>("densities");
-
-    T step_w = static_cast<T>(ctx.Attr<float>("step_w"));
-    T step_h = static_cast<T>(ctx.Attr<float>("step_h"));
-    T offset = static_cast<T>(ctx.Attr<float>("offset"));
-
-    auto img_width = image->dims()[3];
-    auto img_height = image->dims()[2];
-
-    auto feature_width = input->dims()[3];
-    auto feature_height = input->dims()[2];
-
-    T step_width, step_height;
-    if (step_w == 0 || step_h == 0) {
-      step_width = static_cast<T>(img_width) / feature_width;
-      step_height = static_cast<T>(img_height) / feature_height;
-    } else {
-      step_width = step_w;
-      step_height = step_h;
-    }
-    int num_priors = 0;
-
-#ifdef PADDLE_WITH_MKLML
-#pragma omp parallel for reduction(+ : num_priors)
-#endif
-    for (size_t i = 0; i < densities.size(); ++i) {
-      num_priors += (fixed_ratios.size()) * (pow(densities[i], 2));
-    }
-
-    boxes->mutable_data<T>(ctx.GetPlace());
-    vars->mutable_data<T>(ctx.GetPlace());
-
-    auto box_dim = vars->dims();
-    boxes->Resize({feature_height, feature_width, num_priors, 4});
-    auto e_boxes = phi::EigenTensor<T, 4>::From(*boxes).setConstant(0.0);
-    int step_average = static_cast<int>((step_width + step_height) * 0.5);
-
-    std::vector<float> sqrt_fixed_ratios;
-#ifdef PADDLE_WITH_MKLML
-#pragma omp parallel for
-#endif
-    for (size_t i = 0; i < fixed_ratios.size(); i++) {
-      sqrt_fixed_ratios.push_back(sqrt(fixed_ratios[i]));
-    }
-
-#ifdef PADDLE_WITH_MKLML
-#pragma omp parallel for collapse(2)
-#endif
-    for (int h = 0; h < feature_height; ++h) {
-      for (int w = 0; w < feature_width; ++w) {
-        T center_x = (w + offset) * step_width;
-        T center_y = (h + offset) * step_height;
-        int idx = 0;
-        // Generate density prior boxes with fixed sizes.
-        for (size_t s = 0; s < fixed_sizes.size(); ++s) {
-          auto fixed_size = fixed_sizes[s];
-          int density = densities[s];
-          int shift = step_average / density;
-          // Generate density prior boxes with fixed ratios.
-          for (size_t r = 0; r < fixed_ratios.size(); ++r) {
-            float box_width_ratio = fixed_size * sqrt_fixed_ratios[r];
-            float box_height_ratio = fixed_size / sqrt_fixed_ratios[r];
-            float density_center_x = center_x - step_average / 2. + shift / 2.;
-            float density_center_y = center_y - step_average / 2. + shift / 2.;
-            for (int di = 0; di < density; ++di) {
-              for (int dj = 0; dj < density; ++dj) {
-                float center_x_temp = density_center_x + dj * shift;
-                float center_y_temp = density_center_y + di * shift;
-                e_boxes(h, w, idx, 0) = std::max(
-                    (center_x_temp - box_width_ratio / 2.) / img_width, 0.);
-                e_boxes(h, w, idx, 1) = std::max(
-                    (center_y_temp - box_height_ratio / 2.) / img_height, 0.);
-                e_boxes(h, w, idx, 2) = std::min(
-                    (center_x_temp + box_width_ratio / 2.) / img_width, 1.);
-                e_boxes(h, w, idx, 3) = std::min(
-                    (center_y_temp + box_height_ratio / 2.) / img_height, 1.);
-                idx++;
-              }
-            }
-          }
-        }
-      }
-    }
-    if (clip) {
-      T* dt = boxes->data<T>();
-      std::transform(dt, dt + boxes->numel(), dt, [](T v) -> T {
-        return std::min<T>(std::max<T>(v, 0.), 1.);
-      });
-    }
-    phi::DenseTensor var_t;
-    var_t.mutable_data<T>(
-        common::make_ddim({1, static_cast<int>(variances.size())}),
-        ctx.GetPlace());
-
-    auto var_et = phi::EigenTensor<T, 2>::From(var_t);
-
-    for (size_t i = 0; i < variances.size(); ++i) {
-      var_et(0, i) = variances[i];
-    }
-
-    int box_num = feature_height * feature_width * num_priors;
-    auto var_dim = vars->dims();
-    vars->Resize({box_num, static_cast<int>(variances.size())});
-
-    auto e_vars = phi::EigenMatrix<T, Eigen::RowMajor>::From(*vars);
-#ifdef PADDLE_WITH_MKLML
-#pragma omp parallel for collapse(2)
-#endif
-    for (int i = 0; i < box_num; ++i) {
-      for (size_t j = 0; j < variances.size(); ++j) {
-        e_vars(i, j) = variances[j];
-      }
-    }
-
-    vars->Resize(var_dim);
-    boxes->Resize(box_dim);
-  }
-};  // namespace operators
-
-}  // namespace operators
-}  // namespace paddle
diff --git a/paddle/fluid/operators/detection/generate_mask_labels_op.cc b/paddle/fluid/operators/detection/generate_mask_labels_op.cc
deleted file mode 100644
index 5ee843d72387bb..00000000000000
--- a/paddle/fluid/operators/detection/generate_mask_labels_op.cc
+++ /dev/null
@@ -1,547 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <cmath>
-
-#include <algorithm>
-#include <string>
-#include <vector>
-
-#include "paddle/fluid/framework/lod_tensor.h"
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/operators/detection/bbox_util.h"
-#include "paddle/fluid/operators/detection/mask_util.h"
-#include "paddle/fluid/operators/math/concat_and_split.h"
-#include "paddle/phi/kernels/funcs/math_function.h"
-
-namespace paddle {
-namespace operators {
-
-const int kBoxDim = 4;
-
-template <typename T>
-void AppendMask(phi::DenseTensor* out,
-                int64_t offset,
-                phi::DenseTensor* to_add) {
-  auto* out_data = out->data<T>();
-  auto* to_add_data = to_add->data<T>();
-  memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T));
-}
-
-class GenerateMaskLabelsOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("ImInfo"),
-        true,
-        phi::errors::InvalidArgument("Input(ImInfo) shouldn't be null."));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("GtClasses"),
-        true,
-        phi::errors::InvalidArgument("Input(GtClasses) shouldn't be null."));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("IsCrowd"),
-        true,
-        phi::errors::InvalidArgument("Input(IsCrowd) shouldn't be null."));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("GtSegms"),
-        true,
-        phi::errors::InvalidArgument("Input(GtSegms) shouldn't be null."));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("Rois"),
-        true,
-        phi::errors::InvalidArgument("Input(Rois) shouldn't be null."));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("LabelsInt32"),
-        true,
-        phi::errors::InvalidArgument("Input(LabelsInt32) shouldn't be null."));
-
-    PADDLE_ENFORCE_EQ(
-        ctx->HasOutput("MaskRois"),
-        true,
-        phi::errors::InvalidArgument(
-            "Output(MaskRois) of GenerateMaskLabelsOp should not be null"));
-    PADDLE_ENFORCE_EQ(ctx->HasOutput("RoiHasMaskInt32"),
-                      true,
-                      phi::errors::InvalidArgument(
-                          "Output(RoiHasMaskInt32) of GenerateMaskLabelsOp "
-                          "should not be null"));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasOutput("MaskInt32"),
-        true,
-        phi::errors::InvalidArgument(
-            "Output(MaskInt32) of GenerateMaskLabelsOp should not be null"));
-
-    auto im_info_dims = ctx->GetInputDim("ImInfo");
-    auto gt_segms_dims = ctx->GetInputDim("GtSegms");
-    PADDLE_ENFORCE_EQ(
-        im_info_dims.size(),
-        2,
-        phi::errors::InvalidArgument("The rank of Input(ImInfo) must be 2."));
-    PADDLE_ENFORCE_EQ(
-        gt_segms_dims.size(),
-        2,
-        phi::errors::InvalidArgument("The rank of Input(GtSegms) must be 2."));
-    PADDLE_ENFORCE_EQ(gt_segms_dims[1],
-                      2,
-                      phi::errors::InvalidArgument(
-                          "The second dim of Input(GtSegms) must be 2."));
-    int num_classes = ctx->Attrs().Get<int>("num_classes");
-    int resolution = ctx->Attrs().Get<int>("resolution");
-
-    ctx->SetOutputDim("MaskRois", {-1, 4});
-    ctx->SetOutputDim("RoiHasMaskInt32", {-1, 1});
-    ctx->SetOutputDim("MaskInt32", {-1, num_classes * resolution * resolution});
-    if (!ctx->IsRuntime()) {
-      ctx->SetLoDLevel("MaskRois", ctx->GetLoDLevel("Rois"));
-      ctx->SetLoDLevel("RoiHasMaskInt32", ctx->GetLoDLevel("Rois"));
-      ctx->SetLoDLevel("MaskInt32", ctx->GetLoDLevel("Rois"));
-    }
-  }
-
- protected:
-  phi::KernelKey GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Rois");
-    return phi::KernelKey(data_type, platform::CPUPlace());
-  }
-};
-
-/*
- * Expand masks from shape (#masks, M ** 2) to (#masks, #classes * M ** 2)
- * to encode class specific mask targets.
- */
-template <typename T>
-static inline void ExpandMaskTarget(const phi::CPUContext& ctx,
-                                    const phi::DenseTensor& masks,
-                                    const phi::DenseTensor& mask_class_labels,
-                                    const int resolution,
-                                    const int num_classes,
-                                    phi::DenseTensor* mask_targets) {
-  const uint8_t* masks_data = masks.data<uint8_t>();
-  int64_t num_mask = masks.dims()[0];
-  const int* mask_class_labels_data = mask_class_labels.data<int>();
-  const int M = resolution * resolution;
-  const int mask_dim = M * num_classes;
-
-  int* mask_targets_data =
-      mask_targets->mutable_data<int>({num_mask, mask_dim}, ctx.GetPlace());
-  phi::funcs::set_constant(ctx, mask_targets, static_cast<int>(-1));
-  for (int64_t mask_id = 0; mask_id < num_mask; ++mask_id) {
-    int cls = mask_class_labels_data[mask_id];
-    int start = M * cls;
-    if (cls > 0) {
-      for (int i = 0; i < M; ++i) {
-        mask_targets_data[mask_id * mask_dim + start + i] =
-            static_cast<int>(masks_data[mask_id * M + i]);
-      }
-    }
-  }
-}
-
-template <typename T>
-std::vector<phi::DenseTensor> SampleMaskForOneImage(
-    const phi::CPUContext& ctx,
-    const phi::DenseTensor& im_info,
-    const phi::DenseTensor& gt_classes,
-    const phi::DenseTensor& is_crowd,
-    const phi::DenseTensor& gt_segms,
-    const phi::DenseTensor& rois,
-    const phi::DenseTensor& label_int32,
-    const int num_classes,
-    const int resolution,
-    const framework::LoD& segm_length) {
-  // Prepare the mask targets by associating one gt mask to each training roi
-  // that has a fg (non-bg) class label.
-  const int64_t gt_size = static_cast<int64_t>(gt_classes.dims()[0]);
-  const int64_t roi_size = static_cast<int64_t>(rois.dims()[0]);
-  const int* gt_classes_data = gt_classes.data<int>();
-  const int* is_crowd_data = is_crowd.data<int>();
-  const int* label_int32_data = label_int32.data<int>();
-  PADDLE_ENFORCE_EQ(roi_size,
-                    label_int32.dims()[0],
-                    phi::errors::InvalidArgument(
-                        "The first dim of label [%d] is the different from "
-                        "roi_size [%d], they should be same.",
-                        label_int32.dims()[0],
-                        roi_size));
-
-  std::vector<int> mask_gt_inds, fg_inds;
-  std::vector<std::vector<std::vector<T>>> gt_polys;
-
-  auto polys_num = segm_length[1];
-  auto segm_lod_offset = framework::ConvertToOffsetBasedLoD(segm_length);
-  auto lod1 = segm_lod_offset[1];
-  auto lod2 = segm_lod_offset[2];
-  const T* polys_data = gt_segms.data<T>();
-  for (int64_t i = 0; i < gt_size; ++i) {
-    if ((gt_classes_data[i] > 0) && (is_crowd_data[i] == 0)) {
-      mask_gt_inds.emplace_back(i);
-
-      // slice fg segmentation polys
-      int poly_num = static_cast<int>(polys_num[i]);
-      std::vector<std::vector<T>> polys;
-      int s_idx = static_cast<int>(lod1[i]);
-      for (int j = 0; j < poly_num; ++j) {
-        int s = static_cast<int>(lod2[s_idx + j]);
-        int e = static_cast<int>(lod2[s_idx + j + 1]);
-        PADDLE_ENFORCE_NE(s,
-                          e,
-                          phi::errors::InvalidArgument(
-                              "The start point and the end point in the poly "
-                              "segment [%d] should not be same, but received "
-                              "the start point [%d] and the end point [%d].",
-                              i,
-                              s,
-                              e));
-        std::vector<T> plts(polys_data + s * 2, polys_data + e * 2);
-        polys.push_back(plts);
-      }
-      gt_polys.push_back(polys);
-    }
-  }
-  for (int64_t i = 0; i < roi_size; ++i) {
-    if (label_int32_data[i] > 0) {
-      fg_inds.emplace_back(i);
-    }
-  }
-  int gt_num = static_cast<int>(mask_gt_inds.size());
-  int fg_num = static_cast<int>(fg_inds.size());
-
-  phi::DenseTensor boxes_from_polys;
-  boxes_from_polys.mutable_data<T>({gt_num, 4}, platform::CPUPlace());
-  Poly2Boxes(gt_polys, boxes_from_polys.data<T>());
-
-  std::vector<int> roi_has_mask =
-      std::vector<int>(fg_inds.begin(), fg_inds.end());
-  phi::DenseTensor mask_class_labels;
-  phi::DenseTensor masks;
-  phi::DenseTensor rois_fg;
-
-  auto im_scale = im_info.data<T>()[2];
-  if (fg_num > 0) {
-    // Class labels for the foreground rois
-    mask_class_labels.mutable_data<int>({fg_num, 1}, ctx.GetPlace());
-    Gather<int>(label_int32_data,
-                1,
-                fg_inds.data(),
-                static_cast<int>(fg_inds.size()),
-                mask_class_labels.data<int>());
-
-    uint8_t* masks_data = masks.mutable_data<uint8_t>(
-        {fg_num, resolution * resolution}, ctx.GetPlace());
-
-    // Find overlap between all foreground rois and the bounding boxes
-    // enclosing each segmentation
-    T* rois_fg_data = rois_fg.mutable_data<T>({fg_num, 4}, ctx.GetPlace());
-    Gather<T>(
-        rois.data<T>(), 4, fg_inds.data(), fg_inds.size(), rois_fg.data<T>());
-
-    for (int k = 0; k < rois_fg.numel(); ++k) {
-      rois_fg_data[k] = rois_fg_data[k] / im_scale;
-    }
-
-    phi::DenseTensor overlaps_bbfg_bbpolys;
-    overlaps_bbfg_bbpolys.mutable_data<T>({fg_num, gt_num}, ctx.GetPlace());
-    BboxOverlaps<T>(rois_fg, boxes_from_polys, &overlaps_bbfg_bbpolys);
-
-    // Map from each fg rois to the index of the mask with highest overlap
-    // (measured by bbox overlap)
-    T* overlaps_bbfg_bbpolys_data = overlaps_bbfg_bbpolys.data<T>();
-    std::vector<int> fg_masks_inds;
-    for (int64_t i = 0; i < fg_num; ++i) {
-      const T* v = overlaps_bbfg_bbpolys_data + i * gt_num;
-      T max_overlap = std::numeric_limits<T>::min();
-      int id = 0;
-      for (int64_t j = 0; j < gt_num; ++j) {
-        if (v[j] > max_overlap) {
-          max_overlap = v[j];
-          id = static_cast<int>(j);
-        }
-      }
-      fg_masks_inds.push_back(id);
-    }
-
-    // add fg targets
-    for (int64_t i = 0; i < fg_num; ++i) {
-      int fg_polys_ind = fg_masks_inds[i];
-      T* roi_fg = rois_fg_data + i * 4;
-      uint8_t* mask = masks_data + i * resolution * resolution;
-      Polys2MaskWrtBox(gt_polys[fg_polys_ind], roi_fg, resolution, mask);
-    }
-  } else {
-    // The network cannot handle empty blobs, so we must provide a mask
-    // We simply take the first bg roi, given it an all -1's mask (ignore
-    // label), and label it with class zero (bg).
-    int bg_num = 1;
-    T* rois_fg_data = rois_fg.mutable_data<T>({bg_num, 4}, ctx.GetPlace());
-    const T* rois_data = rois.data<T>();
-    std::vector<int> bg_inds;
-    for (int64_t i = 0; i < roi_size; ++i) {
-      if (label_int32_data[i] == 0) {
-        bg_inds.emplace_back(i);
-        rois_fg_data[0] = rois_data[0] / im_scale;
-        rois_fg_data[1] = rois_data[1] / im_scale;
-        rois_fg_data[2] = rois_data[2] / im_scale;
-        rois_fg_data[3] = rois_data[3] / im_scale;
-        break;
-      }
-    }
-    masks.mutable_data<uint8_t>({bg_num, resolution * resolution},
-                                ctx.GetPlace());
-    phi::funcs::set_constant(ctx, &masks, static_cast<uint8_t>(-1));
-    int* mask_class_labels_data =
-        mask_class_labels.mutable_data<int>({bg_num, 1}, ctx.GetPlace());
-    mask_class_labels_data[0] = 0;
-    roi_has_mask = std::vector<int>(bg_inds.begin(), bg_inds.end());
-  }
-
-  phi::DenseTensor masks_expand;
-  ExpandMaskTarget<T>(
-      ctx, masks, mask_class_labels, resolution, num_classes, &masks_expand);
-
-  T* rois_fg_data = rois_fg.data<T>();
-  for (int k = 0; k < rois_fg.numel(); ++k) {
-    rois_fg_data[k] = rois_fg_data[k] * im_scale;
-  }
-
-  phi::DenseTensor roi_has_mask_t;
-  int roi_has_mask_size = static_cast<int>(roi_has_mask.size());
-  int* roi_has_mask_data =
-      roi_has_mask_t.mutable_data<int>({roi_has_mask_size, 1}, ctx.GetPlace());
-  std::copy(roi_has_mask.begin(), roi_has_mask.end(), roi_has_mask_data);
-
-  std::vector<phi::DenseTensor> res;
-  res.emplace_back(rois_fg);
-  res.emplace_back(roi_has_mask_t);
-  res.emplace_back(masks_expand);
-  return res;
-}
-
-template <typename T, typename DeviceContext>
-class GenerateMaskLabelsKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* im_info = ctx.Input<phi::DenseTensor>("ImInfo");
-    auto* gt_classes = ctx.Input<phi::DenseTensor>("GtClasses");
-    auto* is_crowd = ctx.Input<phi::DenseTensor>("IsCrowd");
-    auto* gt_segms = ctx.Input<phi::DenseTensor>("GtSegms");
-    auto* rois = ctx.Input<phi::DenseTensor>("Rois");
-    auto* label_int32 = ctx.Input<phi::DenseTensor>("LabelsInt32");
-
-    auto* mask_rois = ctx.Output<phi::DenseTensor>("MaskRois");
-    auto* roi_has_mask_int32 = ctx.Output<phi::DenseTensor>("RoiHasMaskInt32");
-    auto* mask_int32 = ctx.Output<phi::DenseTensor>("MaskInt32");
-
-    int num_classes = ctx.Attr<int>("num_classes");
-    int resolution = ctx.Attr<int>("resolution");
-
-    PADDLE_ENFORCE_EQ(
-        gt_classes->lod().size(),
-        1UL,
-        phi::errors::InvalidArgument(
-            "GenerateMaskLabelsOp gt_classes needs 1 level of LoD"));
-    PADDLE_ENFORCE_EQ(
-        is_crowd->lod().size(),
-        1UL,
-        phi::errors::InvalidArgument(
-            "GenerateMaskLabelsOp is_crowd needs 1 level of LoD"));
-    PADDLE_ENFORCE_EQ(rois->lod().size(),
-                      1UL,
-                      phi::errors::InvalidArgument(
-                          "GenerateMaskLabelsOp rois needs 1 level of LoD"));
-    PADDLE_ENFORCE_EQ(
-        label_int32->lod().size(),
-        1UL,
-        phi::errors::InvalidArgument(
-            "GenerateMaskLabelsOp label_int32 needs 1 level of LoD"));
-
-    PADDLE_ENFORCE_EQ(
-        gt_segms->lod().size(),
-        3UL,
-        phi::errors::InvalidArgument(
-            "GenerateMaskLabelsOp gt_segms needs 3 level of LoD"));
-
-    int64_t n = static_cast<int64_t>(gt_classes->lod().back().size() - 1);
-    PADDLE_ENFORCE_EQ(
-        gt_segms->lod()[0].size() - 1,
-        n,
-        phi::errors::InvalidArgument(
-            "Batchsize of Input(gt_segms) and Input(gt_classes) should be "
-            "same, but received gt_segms[%d], gt_classes[%d].",
-            gt_segms->lod()[0].size() - 1,
-            n));
-
-    int mask_dim = num_classes * resolution * resolution;
-    int roi_num = static_cast<int>(rois->lod().back()[n]);
-    mask_rois->mutable_data<T>({roi_num, kBoxDim}, ctx.GetPlace());
-    roi_has_mask_int32->mutable_data<int>({roi_num, 1}, ctx.GetPlace());
-    mask_int32->mutable_data<int>({roi_num, mask_dim}, ctx.GetPlace());
-
-    framework::LoD lod;
-    std::vector<size_t> lod0(1, 0);
-
-    int64_t num_mask = 0;
-    auto& dev_ctx = ctx.device_context<phi::CPUContext>();
-
-    auto gt_classes_lod = gt_classes->lod().back();
-    auto is_crowd_lod = is_crowd->lod().back();
-    auto rois_lod = rois->lod().back();
-    auto label_int32_lod = label_int32->lod().back();
-    auto gt_segms_lod = gt_segms->lod();
-
-    for (int i = 0; i < n; ++i) {
-      if (rois_lod[i] == rois_lod[i + 1]) {
-        lod0.emplace_back(num_mask);
-        continue;
-      }
-      phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1);
-      phi::DenseTensor gt_classes_slice =
-          gt_classes->Slice(static_cast<int64_t>(gt_classes_lod[i]),
-                            static_cast<int64_t>(gt_classes_lod[i + 1]));
-      phi::DenseTensor is_crowd_slice =
-          is_crowd->Slice(static_cast<int64_t>(is_crowd_lod[i]),
-                          static_cast<int64_t>(is_crowd_lod[i + 1]));
-      phi::DenseTensor label_int32_slice =
-          label_int32->Slice(static_cast<int64_t>(label_int32_lod[i]),
-                             static_cast<int64_t>(label_int32_lod[i + 1]));
-      phi::DenseTensor rois_slice =
-          rois->Slice(static_cast<int64_t>(rois_lod[i]),
-                      static_cast<int64_t>(rois_lod[i + 1]));
-
-      auto sub_lod_and_offset =
-          framework::GetSubLoDAndAbsoluteOffset(gt_segms_lod, i, i + 1, 0);
-      auto lod_length = sub_lod_and_offset.first;
-      size_t s = sub_lod_and_offset.second.first;
-      size_t e = sub_lod_and_offset.second.second;
-      phi::DenseTensor gt_segms_slice =
-          gt_segms->Slice(static_cast<int64_t>(s), static_cast<int64_t>(e));
-
-      std::vector<phi::DenseTensor> tensor_output =
-          SampleMaskForOneImage<T>(dev_ctx,
-                                   im_info_slice,
-                                   gt_classes_slice,
-                                   is_crowd_slice,
-                                   gt_segms_slice,
-                                   rois_slice,
-                                   label_int32_slice,
-                                   num_classes,
-                                   resolution,
-                                   lod_length);
-
-      phi::DenseTensor sampled_mask_rois = tensor_output[0];
-      phi::DenseTensor sampled_roi_has_mask_int32 = tensor_output[1];
-      phi::DenseTensor sampled_mask_int32 = tensor_output[2];
-
-      AppendMask<T>(mask_rois, kBoxDim * num_mask, &sampled_mask_rois);
-      AppendMask<int>(
-          roi_has_mask_int32, num_mask, &sampled_roi_has_mask_int32);
-      AppendMask<int>(mask_int32, mask_dim * num_mask, &sampled_mask_int32);
-
-      num_mask += sampled_mask_rois.dims()[0];
-      lod0.emplace_back(num_mask);
-    }
-
-    lod.emplace_back(lod0);
-    mask_rois->set_lod(lod);
-    roi_has_mask_int32->set_lod(lod);
-    mask_int32->set_lod(lod);
-    mask_rois->Resize({num_mask, kBoxDim});
-    roi_has_mask_int32->Resize({num_mask, 1});
-    mask_int32->Resize({num_mask, mask_dim});
-  }
-};
-
-class GenerateMaskLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override {
-    AddInput("ImInfo",
-             "(Tensor), This input is a 2D Tensor with shape [B, 3]. "
-             "B is the number of input images, "
-             "each element consists of im_height, im_width, im_scale.");
-    AddInput("GtClasses",
-             "(phi::DenseTensor), This input is a 2D phi::DenseTensor with "
-             "shape [M, 1]. "
-             "M is the number of groundtruth, "
-             "each element is a class label of groundtruth.");
-    AddInput(
-        "IsCrowd",
-        "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
-        "[M, 1]. "
-        "M is the number of groundtruth, "
-        "each element is a flag indicates whether a groundtruth is crowd.");
-    AddInput(
-        "GtSegms",
-        "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
-        "[S, 2], it's LoD "
-        "level is 3. The LoD[0] represents the gt objects number of each "
-        "instance. LoD[1] represents the segmentation counts of each objects. "
-        "LoD[2] represents the polygons number of each segmentation. S the "
-        "total number of polygons coordinate points. Each element is (x, y) "
-        "coordinate points.");
-    AddInput(
-        "Rois",
-        "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
-        "[R, 4]. "
-        "R is the number of rois which is the output of "
-        "generate_proposal_labels, "
-        "each element is a bounding box with (xmin, ymin, xmax, ymax) format.");
-    AddInput("LabelsInt32",
-             "(phi::DenseTensor), This intput is a 2D phi::DenseTensor with "
-             "shape [R, 1], "
-             "each element represents a class label of a roi");
-    AddOutput(
-        "MaskRois",
-        "(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape "
-        "[P, 4]. "
-        "P is the number of mask, "
-        "each element is a bounding box with [xmin, ymin, xmax, ymax] format.");
-    AddOutput("RoiHasMaskInt32",
-              "(phi::DenseTensor), This output is a 2D phi::DenseTensor with "
-              "shape [P, 1], "
-              "each element represents the output mask rois index with regard "
-              "to input rois");
-    AddOutput("MaskInt32",
-              "(phi::DenseTensor), This output is a 4D phi::DenseTensor with "
-              "shape [P, Q], "
-              "Q equal to num_classes * resolution * resolution");
-
-    AddAttr<int>("num_classes", "Class number.");
-    AddAttr<int>("resolution", "Resolution of mask.");
-
-    AddComment(R"DOC(
-This operator can be, for given the RoIs and corresponding labels,
-to sample foreground RoIs. This mask branch also has
-a :math: `K \\times M^{2}` dimensional output targets for each foreground
-RoI, which encodes K binary masks of resolution M x M, one for each of the
-K classes. This mask targets are used to compute loss of mask branch.
-    )DOC");
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-REGISTER_OPERATOR(
-    generate_mask_labels,
-    ops::GenerateMaskLabelsOp,
-    ops::GenerateMaskLabelsOpMaker,
-    paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
-    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
-
-PD_REGISTER_STRUCT_KERNEL(generate_mask_labels,
-                          CPU,
-                          ALL_LAYOUT,
-                          ops::GenerateMaskLabelsKernel,
-                          float) {}
diff --git a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc
deleted file mode 100644
index ad37aa2ae682f7..00000000000000
--- a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc
+++ /dev/null
@@ -1,837 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <cmath>
-
-#include <algorithm>
-#include <string>
-#include <vector>
-
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/fluid/operators/detection/bbox_util.h"
-#include "paddle/fluid/operators/math/concat_and_split.h"
-#include "paddle/phi/kernels/funcs/gather.h"
-#include "paddle/phi/kernels/funcs/math_function.h"
-
-namespace paddle {
-namespace operators {
-
-const int kBoxDim = 4;
-
-template <typename T>
-void AppendRois(phi::DenseTensor* out,
-                int64_t offset,
-                phi::DenseTensor* to_add) {
-  auto* out_data = out->data<T>();
-  auto* to_add_data = to_add->data<T>();
-  memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T));
-}
-
-// Filter the ground-truth in RoIs and the RoIs with non-positive area.
-// The ground-truth has max overlap with itself so the max_overlap is 1
-// and the corresponding RoI will be removed.
-template <typename T>
-void FilterRoIs(const platform::DeviceContext& ctx,
-                const phi::DenseTensor& rpn_rois,
-                const phi::DenseTensor& max_overlap,
-                phi::DenseTensor* keep) {
-  const T* rpn_rois_dt = rpn_rois.data<T>();
-  const T* max_overlap_dt = max_overlap.data<T>();
-  int rois_num = static_cast<int>(max_overlap.numel());
-  keep->Resize({rois_num});
-  int* keep_data = keep->mutable_data<int>(ctx.GetPlace());
-  int keep_len = 0;
-  for (int i = 0; i < rois_num; ++i) {
-    if ((rpn_rois_dt[i * 4 + 2] - rpn_rois_dt[i * 4 + 0] + 1) > 0 &&
-        (rpn_rois_dt[i * 4 + 3] - rpn_rois_dt[i * 4 + 1] + 1) > 0 &&
-        max_overlap_dt[i] < 1.) {
-      keep_data[keep_len++] = i;
-    }
-  }
-  keep->Resize({keep_len});
-}
-
-class GenerateProposalLabelsOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("RpnRois"),
-        true,
-        phi::errors::NotFound("Input(RpnRois) shouldn't be null."));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("GtClasses"),
-        true,
-        phi::errors::NotFound("Input(GtClasses) shouldn't be null."));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("IsCrowd"),
-        true,
-        phi::errors::NotFound("Input(IsCrowd) shouldn't be null."));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("GtBoxes"),
-        true,
-        phi::errors::NotFound("Input(GtBoxes) shouldn't be null."));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("ImInfo"),
-        true,
-        phi::errors::NotFound("Input(ImInfo) shouldn't be null."));
-
-    PADDLE_ENFORCE_EQ(
-        ctx->HasOutput("Rois"),
-        true,
-        phi::errors::NotFound(
-            "Output(Rois) of GenerateProposalLabelsOp should not be null"));
-    PADDLE_ENFORCE_EQ(ctx->HasOutput("LabelsInt32"),
-                      true,
-                      phi::errors::NotFound("Output(LabelsInt32) of "
-                                            "GenerateProposalLabelsOp "
-                                            "should not be null"));
-    PADDLE_ENFORCE_EQ(ctx->HasOutput("BboxTargets"),
-                      true,
-                      phi::errors::NotFound("Output(BboxTargets) of "
-                                            "GenerateProposalLabelsOp "
-                                            "should not be null"));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasOutput("BboxInsideWeights"),
-        true,
-        phi::errors::NotFound(
-            "Output(BboxInsideWeights) of GenerateProposalLabelsOp "
-            "should not be null"));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasOutput("BboxOutsideWeights"),
-        true,
-        phi::errors::NotFound(
-            "Output(BboxOutsideWeights) of GenerateProposalLabelsOp "
-            "should not be null"));
-
-    auto rpn_rois_dims = ctx->GetInputDim("RpnRois");
-    auto gt_boxes_dims = ctx->GetInputDim("GtBoxes");
-    auto im_info_dims = ctx->GetInputDim("ImInfo");
-
-    PADDLE_ENFORCE_EQ(rpn_rois_dims.size(),
-                      2,
-                      phi::errors::InvalidArgument(
-                          "The dimensions size of Input(RpnRois) must be 2. "
-                          "But received dimensions size=[%d], dimensions=[%s].",
-                          rpn_rois_dims.size(),
-                          rpn_rois_dims));
-    PADDLE_ENFORCE_EQ(gt_boxes_dims.size(),
-                      2,
-                      phi::errors::InvalidArgument(
-                          "The dimensions size of Input(GtBoxes) must be 2. "
-                          "But received dimensions size=[%d], dimensions=[%s].",
-                          gt_boxes_dims.size(),
-                          gt_boxes_dims));
-    PADDLE_ENFORCE_EQ(im_info_dims.size(),
-                      2,
-                      phi::errors::InvalidArgument(
-                          "The dimensions size of Input(ImInfo) must be 2. But "
-                          "received dimensions size=[%d], dimensions=[%s].",
-                          im_info_dims.size(),
-                          im_info_dims));
-
-    int class_nums = ctx->Attrs().Get<int>("class_nums");
-    bool is_cascade_rcnn = ctx->Attrs().Get<bool>("is_cascade_rcnn");
-    if (is_cascade_rcnn) {
-      PADDLE_ENFORCE_EQ(
-          ctx->HasInput("MaxOverlap"),
-          true,
-          phi::errors::NotFound(
-              "Input(MaxOverlap) of GenerateProposalLabelsOp "
-              "should not be null when is_cascade_rcnn is True."));
-    }
-
-    ctx->SetOutputDim("Rois", {-1, 4});
-    ctx->SetOutputDim("LabelsInt32", {-1, 1});
-    ctx->SetOutputDim("BboxTargets", {-1, 4 * class_nums});
-    ctx->SetOutputDim("BboxInsideWeights", {-1, 4 * class_nums});
-    ctx->SetOutputDim("BboxOutsideWeights", {-1, 4 * class_nums});
-    ctx->SetOutputDim("MaxOverlapWithGT", {-1});
-  }
-
- protected:
-  phi::KernelKey GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "RpnRois");
-    return phi::KernelKey(data_type, platform::CPUPlace());
-  }
-};
-
-template <typename T>
-void Concat(const phi::CPUContext& context,
-            const phi::DenseTensor& in_tensor_a,
-            const phi::DenseTensor& in_tensor_b,
-            phi::DenseTensor* out_tensor) {
-  int axis = 0;
-  std::vector<phi::DenseTensor> inputs;
-  inputs.emplace_back(in_tensor_a);
-  inputs.emplace_back(in_tensor_b);
-  math::ConcatFunctor<phi::CPUContext, T> concat_functor;
-  concat_functor(context, inputs, axis, out_tensor);
-}
-
-template <typename T>
-std::vector<std::vector<int>> SampleFgBgGt(const phi::CPUContext& context,
-                                           phi::DenseTensor* iou,
-                                           const phi::DenseTensor& is_crowd,
-                                           const int batch_size_per_im,
-                                           const float fg_fraction,
-                                           const float fg_thresh,
-                                           const float bg_thresh_hi,
-                                           const float bg_thresh_lo,
-                                           std::minstd_rand engine,
-                                           const bool use_random,
-                                           const bool is_cascade_rcnn,
-                                           const phi::DenseTensor& rpn_rois) {
-  std::vector<int> fg_inds;
-  std::vector<int> bg_inds;
-  std::vector<int> mapped_gt_inds;
-  int64_t gt_num = is_crowd.numel();
-  const int* crowd_data = is_crowd.data<int>();
-  T* proposal_to_gt_overlaps = iou->data<T>();
-  int64_t row = iou->dims()[0];
-  int64_t col = iou->dims()[1];
-  float epsilon = 0.00001;
-  // Follow the Faster RCNN's implementation
-  for (int64_t i = 0; i < row; ++i) {
-    const T* v = proposal_to_gt_overlaps + i * col;
-
-    T max_overlap = *std::max_element(v, v + col);
-    if ((i < gt_num) && (crowd_data[i])) {
-      max_overlap = -1.0;
-    }
-    if (max_overlap >= fg_thresh) {
-      // fg mapped gt label index
-      for (int64_t j = 0; j < col; ++j) {
-        T val = proposal_to_gt_overlaps[i * col + j];
-        auto diff = std::abs(max_overlap - val);
-        if (diff < epsilon) {
-          fg_inds.emplace_back(i);
-          mapped_gt_inds.emplace_back(j);
-          break;
-        }
-      }
-    } else if ((max_overlap >= bg_thresh_lo) && (max_overlap < bg_thresh_hi)) {
-      bg_inds.emplace_back(i);
-    } else {
-      continue;
-    }
-  }
-
-  std::vector<std::vector<int>> res;
-  if (is_cascade_rcnn) {
-    res.emplace_back(fg_inds);
-    res.emplace_back(bg_inds);
-    res.emplace_back(mapped_gt_inds);
-  } else {
-    // Reservoir Sampling
-    // sampling fg
-    std::uniform_real_distribution<float> uniform(0, 1);
-    int fg_rois_per_im = std::floor(batch_size_per_im * fg_fraction);  // NOLINT
-    int fg_rois_this_image = static_cast<int>(fg_inds.size());
-    int fg_rois_per_this_image = std::min(fg_rois_per_im, fg_rois_this_image);
-    if (use_random) {
-      const int64_t fg_size = static_cast<int64_t>(fg_inds.size());
-      if (fg_size > fg_rois_per_this_image) {
-        for (int64_t i = fg_rois_per_this_image; i < fg_size; ++i) {
-          int rng_ind = std::floor(uniform(engine) * i);  // NOLINT
-          if (rng_ind < fg_rois_per_this_image) {
-            std::iter_swap(fg_inds.begin() + rng_ind, fg_inds.begin() + i);
-            std::iter_swap(mapped_gt_inds.begin() + rng_ind,
-                           mapped_gt_inds.begin() + i);
-          }
-        }
-      }
-    }
-    std::vector<int> new_fg_inds(fg_inds.begin(),
-                                 fg_inds.begin() + fg_rois_per_this_image);
-    std::vector<int> new_gt_inds(
-        mapped_gt_inds.begin(),
-        mapped_gt_inds.begin() + fg_rois_per_this_image);
-    // sampling bg
-    int bg_rois_per_image = batch_size_per_im - fg_rois_per_this_image;
-    int bg_rois_this_image = static_cast<int>(bg_inds.size());
-    int bg_rois_per_this_image =
-        std::min(bg_rois_per_image, bg_rois_this_image);
-    if (use_random) {
-      const int64_t bg_size = static_cast<int64_t>(bg_inds.size());
-      if (bg_size > bg_rois_per_this_image) {
-        for (int64_t i = bg_rois_per_this_image; i < bg_size; ++i) {
-          int rng_ind = std::floor(uniform(engine) * i);  // NOLINT
-          if (rng_ind < fg_rois_per_this_image)
-            std::iter_swap(bg_inds.begin() + rng_ind, bg_inds.begin() + i);
-        }
-      }
-    }
-    std::vector<int> new_bg_inds(bg_inds.begin(),
-                                 bg_inds.begin() + bg_rois_per_this_image);
-    //
-    res.emplace_back(new_fg_inds);
-    res.emplace_back(new_bg_inds);
-    res.emplace_back(new_gt_inds);
-  }
-
-  return res;
-}
-
-template <typename T>
-void GatherBoxesLabels(const phi::CPUContext& context,
-                       const phi::DenseTensor& boxes,
-                       const phi::DenseTensor& max_overlap,
-                       const phi::DenseTensor& gt_boxes,
-                       const phi::DenseTensor& gt_classes,
-                       const std::vector<int>& fg_inds,
-                       const std::vector<int>& bg_inds,
-                       const std::vector<int>& gt_inds,
-                       phi::DenseTensor* sampled_boxes,
-                       phi::DenseTensor* sampled_labels,
-                       phi::DenseTensor* sampled_gts,
-                       phi::DenseTensor* sampled_max_overlap) {
-  int fg_num = static_cast<int>(fg_inds.size());
-  int bg_num = static_cast<int>(bg_inds.size());
-  phi::DenseTensor fg_inds_t, bg_inds_t, gt_box_inds_t, gt_label_inds_t;
-  int* fg_inds_data = fg_inds_t.mutable_data<int>({fg_num}, context.GetPlace());
-  int* bg_inds_data = bg_inds_t.mutable_data<int>({bg_num}, context.GetPlace());
-  int* gt_box_inds_data =
-      gt_box_inds_t.mutable_data<int>({fg_num}, context.GetPlace());
-  int* gt_label_inds_data =
-      gt_label_inds_t.mutable_data<int>({fg_num}, context.GetPlace());
-  std::copy(fg_inds.begin(), fg_inds.end(), fg_inds_data);
-  std::copy(bg_inds.begin(), bg_inds.end(), bg_inds_data);
-  std::copy(gt_inds.begin(), gt_inds.end(), gt_box_inds_data);
-  std::copy(gt_inds.begin(), gt_inds.end(), gt_label_inds_data);
-
-  phi::DenseTensor fg_boxes, bg_boxes, fg_labels, bg_labels;
-  fg_boxes.mutable_data<T>({fg_num, kBoxDim}, context.GetPlace());
-  phi::funcs::CPUGather<T>(context, boxes, fg_inds_t, &fg_boxes);
-  bg_boxes.mutable_data<T>({bg_num, kBoxDim}, context.GetPlace());
-  phi::funcs::CPUGather<T>(context, boxes, bg_inds_t, &bg_boxes);
-  Concat<T>(context, fg_boxes, bg_boxes, sampled_boxes);
-  phi::funcs::CPUGather<T>(context, gt_boxes, gt_box_inds_t, sampled_gts);
-  fg_labels.mutable_data<int>({fg_num}, context.GetPlace());
-  phi::funcs::CPUGather<int>(context, gt_classes, gt_label_inds_t, &fg_labels);
-  bg_labels.mutable_data<int>({bg_num}, context.GetPlace());
-  phi::funcs::set_constant(context, &bg_labels, static_cast<int>(0));
-  Concat<int>(context, fg_labels, bg_labels, sampled_labels);
-
-  phi::DenseTensor fg_max_overlap, bg_max_overlap;
-  fg_max_overlap.mutable_data<T>({fg_num}, context.GetPlace());
-  phi::funcs::CPUGather<T>(context, max_overlap, fg_inds_t, &fg_max_overlap);
-  bg_max_overlap.mutable_data<T>({bg_num}, context.GetPlace());
-  phi::funcs::CPUGather<T>(context, max_overlap, bg_inds_t, &bg_max_overlap);
-  Concat<T>(context, fg_max_overlap, bg_max_overlap, sampled_max_overlap);
-}
-
-template <typename T>
-std::vector<phi::DenseTensor> SampleRoisForOneImage(
-    const phi::CPUContext& context,
-    const phi::DenseTensor& rpn_rois_in,
-    const phi::DenseTensor& gt_classes,
-    const phi::DenseTensor& is_crowd,
-    const phi::DenseTensor& gt_boxes,
-    const phi::DenseTensor& im_info,
-    const int batch_size_per_im,
-    const float fg_fraction,
-    const float fg_thresh,
-    const float bg_thresh_hi,
-    const float bg_thresh_lo,
-    const std::vector<float>& bbox_reg_weights,
-    const int class_nums,
-    std::minstd_rand engine,
-    bool use_random,
-    bool is_cascade_rcnn,
-    bool is_cls_agnostic,
-    const phi::DenseTensor& max_overlap) {
-  // 1.1 map to original image
-  auto im_scale = im_info.data<T>()[2];
-  phi::DenseTensor rpn_rois;
-  rpn_rois.mutable_data<T>(rpn_rois_in.dims(), context.GetPlace());
-  const T* rpn_rois_in_dt = rpn_rois_in.data<T>();
-  T* rpn_rois_dt = rpn_rois.data<T>();
-
-  for (int i = 0; i < rpn_rois.numel(); ++i) {
-    rpn_rois_dt[i] = rpn_rois_in_dt[i] / im_scale;
-  }
-
-  int proposals_num = 1;
-
-  if (is_cascade_rcnn) {
-    phi::DenseTensor keep;
-    FilterRoIs<T>(context, rpn_rois, max_overlap, &keep);
-    phi::DenseTensor roi_filter;
-    // phi::DenseTensor box_filter;
-    if (keep.numel() == 0) {
-      phi::funcs::SetConstant<phi::CPUContext, T> set_zero;
-      roi_filter.mutable_data<T>({proposals_num, kBoxDim}, context.GetPlace());
-      set_zero(context, &roi_filter, static_cast<T>(0));
-    } else {
-      proposals_num = static_cast<int>(keep.numel());
-      roi_filter.mutable_data<T>({proposals_num, kBoxDim}, context.GetPlace());
-      phi::funcs::CPUGather<T>(context, rpn_rois, keep, &roi_filter);
-    }
-    T* roi_filter_dt = roi_filter.data<T>();
-    memcpy(rpn_rois_dt, roi_filter_dt, roi_filter.numel() * sizeof(T));
-    rpn_rois.Resize(roi_filter.dims());
-  } else {
-    proposals_num = static_cast<int>(rpn_rois.dims()[0]);
-  }
-  // 1.2 compute overlaps
-  proposals_num += static_cast<int>(gt_boxes.dims()[0]);
-
-  phi::DenseTensor proposal_to_gt_overlaps;
-  proposal_to_gt_overlaps.mutable_data<T>({proposals_num, gt_boxes.dims()[0]},
-                                          context.GetPlace());
-
-  phi::DenseTensor boxes;
-  boxes.mutable_data<T>({proposals_num, kBoxDim}, context.GetPlace());
-  Concat<T>(context, gt_boxes, rpn_rois, &boxes);
-  BboxOverlaps<T>(boxes, gt_boxes, &proposal_to_gt_overlaps);
-
-  phi::DenseTensor proposal_with_max_overlap;
-  proposal_with_max_overlap.mutable_data<T>({proposals_num},
-                                            context.GetPlace());
-
-  MaxIoU<T>(proposal_to_gt_overlaps, &proposal_with_max_overlap);
-
-  // Generate proposal index
-  std::vector<std::vector<int>> fg_bg_gt =
-      SampleFgBgGt<T>(context,
-                      &proposal_to_gt_overlaps,
-                      is_crowd,
-                      batch_size_per_im,
-                      fg_fraction,
-                      fg_thresh,
-                      bg_thresh_hi,
-                      bg_thresh_lo,
-                      engine,
-                      use_random,
-                      is_cascade_rcnn,
-                      boxes);
-  std::vector<int> fg_inds = fg_bg_gt[0];
-  std::vector<int> bg_inds = fg_bg_gt[1];
-  std::vector<int> mapped_gt_inds = fg_bg_gt[2];  // mapped_gt_labels
-
-  // Gather boxes and labels
-  phi::DenseTensor sampled_boxes, sampled_labels, sampled_gts,
-      sampled_max_overlap;
-  int fg_num = static_cast<int>(fg_inds.size());
-  int bg_num = static_cast<int>(bg_inds.size());
-  int boxes_num = fg_num + bg_num;
-  framework::DDim bbox_dim({boxes_num, kBoxDim});
-  sampled_boxes.mutable_data<T>(bbox_dim, context.GetPlace());
-  sampled_labels.mutable_data<int>({boxes_num}, context.GetPlace());
-  sampled_gts.mutable_data<T>({fg_num, kBoxDim}, context.GetPlace());
-  sampled_max_overlap.mutable_data<T>({boxes_num}, context.GetPlace());
-  GatherBoxesLabels<T>(context,
-                       boxes,
-                       proposal_with_max_overlap,
-                       gt_boxes,
-                       gt_classes,
-                       fg_inds,
-                       bg_inds,
-                       mapped_gt_inds,
-                       &sampled_boxes,
-                       &sampled_labels,
-                       &sampled_gts,
-                       &sampled_max_overlap);
-
-  // Compute targets
-  phi::DenseTensor bbox_targets_single;
-  bbox_targets_single.mutable_data<T>(bbox_dim, context.GetPlace());
-  BoxToDelta<T>(fg_num,
-                sampled_boxes,
-                sampled_gts,
-                bbox_reg_weights.data(),
-                false,
-                &bbox_targets_single);
-
-  // Scale rois
-  phi::DenseTensor sampled_rois;
-  sampled_rois.mutable_data<T>(sampled_boxes.dims(), context.GetPlace());
-  auto sampled_rois_et = framework::EigenTensor<T, 2>::From(sampled_rois);
-  auto sampled_boxes_et = framework::EigenTensor<T, 2>::From(sampled_boxes);
-  sampled_rois_et = sampled_boxes_et * im_scale;
-
-  // Expand box targets
-  phi::DenseTensor bbox_targets, bbox_inside_weights, bbox_outside_weights;
-  framework::DDim bbox_expand_dim({boxes_num, kBoxDim * class_nums});
-  bbox_targets.mutable_data<T>(bbox_expand_dim, context.GetPlace());
-  bbox_inside_weights.mutable_data<T>(bbox_expand_dim, context.GetPlace());
-  bbox_outside_weights.mutable_data<T>(bbox_expand_dim, context.GetPlace());
-  phi::funcs::set_constant(context, &bbox_targets, static_cast<T>(0.0));
-  phi::funcs::set_constant(context, &bbox_inside_weights, static_cast<T>(0.0));
-  phi::funcs::set_constant(context, &bbox_outside_weights, static_cast<T>(0.0));
-
-  auto* bbox_targets_single_data = bbox_targets_single.data<T>();
-  auto* sampled_labels_data = sampled_labels.data<int>();
-  auto* bbox_targets_data = bbox_targets.data<T>();
-  auto* bbox_inside_weights_data = bbox_inside_weights.data<T>();
-  auto* bbox_outside_weights_data = bbox_outside_weights.data<T>();
-  int width = kBoxDim * class_nums;
-  for (int64_t i = 0; i < boxes_num; ++i) {
-    int label = sampled_labels_data[i];
-    if (label > 0) {
-      if (is_cls_agnostic) {
-        label = 1;
-      }
-      int dst_idx = static_cast<int>(i * width + kBoxDim * label);
-      int src_idx = static_cast<int>(kBoxDim * i);
-      bbox_targets_data[dst_idx] = bbox_targets_single_data[src_idx];
-      bbox_targets_data[dst_idx + 1] = bbox_targets_single_data[src_idx + 1];
-      bbox_targets_data[dst_idx + 2] = bbox_targets_single_data[src_idx + 2];
-      bbox_targets_data[dst_idx + 3] = bbox_targets_single_data[src_idx + 3];
-      bbox_inside_weights_data[dst_idx] = 1;
-      bbox_inside_weights_data[dst_idx + 1] = 1;
-      bbox_inside_weights_data[dst_idx + 2] = 1;
-      bbox_inside_weights_data[dst_idx + 3] = 1;
-      bbox_outside_weights_data[dst_idx] = 1;
-      bbox_outside_weights_data[dst_idx + 1] = 1;
-      bbox_outside_weights_data[dst_idx + 2] = 1;
-      bbox_outside_weights_data[dst_idx + 3] = 1;
-    }
-  }
-  std::vector<phi::DenseTensor> res;
-  res.emplace_back(sampled_rois);
-  res.emplace_back(sampled_labels);
-  res.emplace_back(bbox_targets);
-  res.emplace_back(bbox_inside_weights);
-  res.emplace_back(bbox_outside_weights);
-  res.emplace_back(sampled_max_overlap);
-  return res;
-}
-
-template <typename T, typename DeviceContext>
-class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    auto* rpn_rois = context.Input<phi::DenseTensor>("RpnRois");
-    auto* gt_classes = context.Input<phi::DenseTensor>("GtClasses");
-    auto* is_crowd = context.Input<phi::DenseTensor>("IsCrowd");
-    auto* gt_boxes = context.Input<phi::DenseTensor>("GtBoxes");
-    auto* im_info = context.Input<phi::DenseTensor>("ImInfo");
-
-    auto* rois = context.Output<phi::DenseTensor>("Rois");
-    auto* labels_int32 = context.Output<phi::DenseTensor>("LabelsInt32");
-    auto* bbox_targets = context.Output<phi::DenseTensor>("BboxTargets");
-    auto* bbox_inside_weights =
-        context.Output<phi::DenseTensor>("BboxInsideWeights");
-    auto* bbox_outside_weights =
-        context.Output<phi::DenseTensor>("BboxOutsideWeights");
-    auto* max_overlap_with_gt =
-        context.Output<phi::DenseTensor>("MaxOverlapWithGT");
-
-    int batch_size_per_im = context.Attr<int>("batch_size_per_im");
-    float fg_fraction = context.Attr<float>("fg_fraction");
-    float fg_thresh = context.Attr<float>("fg_thresh");
-    float bg_thresh_hi = context.Attr<float>("bg_thresh_hi");
-    float bg_thresh_lo = context.Attr<float>("bg_thresh_lo");
-    std::vector<float> bbox_reg_weights =
-        context.Attr<std::vector<float>>("bbox_reg_weights");
-    int class_nums = context.Attr<int>("class_nums");
-    bool use_random = context.Attr<bool>("use_random");
-    bool is_cascade_rcnn = context.Attr<bool>("is_cascade_rcnn");
-    bool is_cls_agnostic = context.Attr<bool>("is_cls_agnostic");
-    PADDLE_ENFORCE_EQ(
-        rpn_rois->lod().size(),
-        1UL,
-        phi::errors::InvalidArgument(
-            "GenerateProposalLabelsOp rpn_rois needs 1 level of LoD. But "
-            "received level of LoD is [%d], LoD is [%s].",
-            rpn_rois->lod().size(),
-            rpn_rois->lod()));
-    PADDLE_ENFORCE_EQ(
-        gt_classes->lod().size(),
-        1UL,
-        phi::errors::InvalidArgument(
-            "GenerateProposalLabelsOp gt_classes needs 1 level of LoD. But "
-            "received level of LoD is [%d], LoD is [%s].",
-            gt_classes->lod().size(),
-            gt_classes->lod()));
-    PADDLE_ENFORCE_EQ(
-        is_crowd->lod().size(),
-        1UL,
-        phi::errors::InvalidArgument(
-            "GenerateProposalLabelsOp is_crowd needs 1 level of LoD. But "
-            "received level of LoD is [%d], LoD is [%s].",
-            is_crowd->lod().size(),
-            is_crowd->lod()));
-    PADDLE_ENFORCE_EQ(
-        gt_boxes->lod().size(),
-        1UL,
-        phi::errors::InvalidArgument(
-            "GenerateProposalLabelsOp gt_boxes needs 1 level of LoD. But "
-            "received level of LoD is [%d], LoD is [%s].",
-            gt_boxes->lod().size(),
-            gt_boxes->lod()));
-    int64_t n = static_cast<int64_t>(rpn_rois->lod().back().size() - 1);
-    int64_t rois_num = rpn_rois->dims()[0];
-    int64_t gts_num = gt_boxes->dims()[0];
-    int64_t init_num =
-        is_cascade_rcnn ? rois_num + gts_num : n * batch_size_per_im;
-
-    rois->mutable_data<T>({init_num, kBoxDim}, context.GetPlace());
-    labels_int32->mutable_data<int>({init_num, 1}, context.GetPlace());
-    bbox_targets->mutable_data<T>({init_num, kBoxDim * class_nums},
-                                  context.GetPlace());
-    bbox_inside_weights->mutable_data<T>({init_num, kBoxDim * class_nums},
-                                         context.GetPlace());
-    bbox_outside_weights->mutable_data<T>({init_num, kBoxDim * class_nums},
-                                          context.GetPlace());
-    max_overlap_with_gt->Resize({init_num});
-    max_overlap_with_gt->mutable_data<T>(context.GetPlace());
-
-    std::random_device rnd;
-    std::minstd_rand engine;
-    int seed = static_cast<int>(rnd());
-    engine.seed(seed);
-
-    framework::LoD lod;
-    std::vector<size_t> lod0(1, 0);
-
-    int64_t num_rois = 0;
-    auto& dev_ctx = context.device_context<phi::CPUContext>();
-
-    auto rpn_rois_lod = rpn_rois->lod().back();
-    auto gt_classes_lod = gt_classes->lod().back();
-    auto is_crowd_lod = is_crowd->lod().back();
-    auto gt_boxes_lod = gt_boxes->lod().back();
-    for (int i = 0; i < n; ++i) {
-      if (rpn_rois_lod[i] == rpn_rois_lod[i + 1]) {
-        lod0.emplace_back(num_rois);
-        continue;
-      }
-      phi::DenseTensor rpn_rois_slice =
-          rpn_rois->Slice(static_cast<int64_t>(rpn_rois_lod[i]),
-                          static_cast<int64_t>(rpn_rois_lod[i + 1]));
-      phi::DenseTensor gt_classes_slice =
-          gt_classes->Slice(static_cast<int64_t>(gt_classes_lod[i]),
-                            static_cast<int64_t>(gt_classes_lod[i + 1]));
-      phi::DenseTensor is_crowd_slice =
-          is_crowd->Slice(static_cast<int64_t>(is_crowd_lod[i]),
-                          static_cast<int64_t>(is_crowd_lod[i + 1]));
-      phi::DenseTensor gt_boxes_slice =
-          gt_boxes->Slice(static_cast<int64_t>(gt_boxes_lod[i]),
-                          static_cast<int64_t>(gt_boxes_lod[i + 1]));
-      phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1);
-      phi::DenseTensor max_overlap_slice;
-      if (is_cascade_rcnn) {
-        auto* max_overlap = context.Input<phi::DenseTensor>("MaxOverlap");
-        max_overlap_slice =
-            max_overlap->Slice(static_cast<int64_t>(rpn_rois_lod[i]),
-                               static_cast<int64_t>(rpn_rois_lod[i + 1]));
-      } else {
-        max_overlap_slice.mutable_data<T>({rpn_rois_slice.dims()[0]},
-                                          context.GetPlace());
-      }
-      std::vector<phi::DenseTensor> tensor_output =
-          SampleRoisForOneImage<T>(dev_ctx,
-                                   rpn_rois_slice,
-                                   gt_classes_slice,
-                                   is_crowd_slice,
-                                   gt_boxes_slice,
-                                   im_info_slice,
-                                   batch_size_per_im,
-                                   fg_fraction,
-                                   fg_thresh,
-                                   bg_thresh_hi,
-                                   bg_thresh_lo,
-                                   bbox_reg_weights,
-                                   class_nums,
-                                   engine,
-                                   use_random,
-                                   is_cascade_rcnn,
-                                   is_cls_agnostic,
-                                   max_overlap_slice);
-      phi::DenseTensor sampled_rois = tensor_output[0];
-      phi::DenseTensor sampled_labels_int32 = tensor_output[1];
-      phi::DenseTensor sampled_bbox_targets = tensor_output[2];
-      phi::DenseTensor sampled_bbox_inside_weights = tensor_output[3];
-      phi::DenseTensor sampled_bbox_outside_weights = tensor_output[4];
-      phi::DenseTensor sampled_max_overlap = tensor_output[5];
-
-      AppendRois<T>(rois, kBoxDim * num_rois, &sampled_rois);
-      AppendRois<int>(labels_int32, num_rois, &sampled_labels_int32);
-      int64_t offset = kBoxDim * num_rois * class_nums;
-      AppendRois<T>(bbox_targets, offset, &sampled_bbox_targets);
-      AppendRois<T>(bbox_inside_weights, offset, &sampled_bbox_inside_weights);
-      AppendRois<T>(
-          bbox_outside_weights, offset, &sampled_bbox_outside_weights);
-      AppendRois<T>(max_overlap_with_gt, num_rois, &sampled_max_overlap);
-
-      num_rois += sampled_rois.dims()[0];
-      lod0.emplace_back(num_rois);
-    }
-
-    lod.emplace_back(lod0);
-    rois->set_lod(lod);
-    labels_int32->set_lod(lod);
-    bbox_targets->set_lod(lod);
-    bbox_inside_weights->set_lod(lod);
-    bbox_outside_weights->set_lod(lod);
-    rois->Resize({num_rois, kBoxDim});
-    labels_int32->Resize({num_rois, 1});
-    bbox_targets->Resize({num_rois, kBoxDim * class_nums});
-    bbox_inside_weights->Resize({num_rois, kBoxDim * class_nums});
-    bbox_outside_weights->Resize({num_rois, kBoxDim * class_nums});
-    max_overlap_with_gt->Resize({num_rois});
-    max_overlap_with_gt->set_lod(lod);
-  }
-};
-
-class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override {
-    AddInput(
-        "RpnRois",
-        "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
-        "[N, 4]. "
-        "N is the number of the GenerateProposalOp's output, "
-        "each element is a bounding box with [xmin, ymin, xmax, ymax] format.");
-    AddInput("GtClasses",
-             "(phi::DenseTensor), This input is a 2D phi::DenseTensor with "
-             "shape [M, 1]. "
-             "M is the number of groundtruth, "
-             "each element is a class label of groundtruth.");
-    AddInput(
-        "IsCrowd",
-        "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
-        "[M, 1]. "
-        "M is the number of groundtruth, "
-        "each element is a flag indicates whether a groundtruth is crowd.");
-    AddInput(
-        "GtBoxes",
-        "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
-        "[M, 4]. "
-        "M is the number of groundtruth, "
-        "each element is a bounding box with [xmin, ymin, xmax, ymax] format.");
-    AddInput("ImInfo",
-             "(Tensor), This input is a 2D Tensor with shape [B, 3]. "
-             "B is the number of input images, "
-             "each element consists of im_height, im_width, im_scale.");
-    AddInput("MaxOverlap",
-             "(phi::DenseTensor), This input is a 1D phi::DenseTensor with "
-             "shape [N]."
-             "N is the number of Input(RpnRois), "
-             "each element is the maximum overlap between "
-             "the proposal RoI and ground-truth.")
-        .AsDispensable();
-
-    AddOutput(
-        "Rois",
-        "(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape "
-        "[P, 4]. "
-        "P usuall equal to  batch_size_per_im * batch_size, "
-        "each element is a bounding box with [xmin, ymin, xmax, ymax] format.");
-    AddOutput("LabelsInt32",
-              "(phi::DenseTensor), This output is a 2D phi::DenseTensor with "
-              "shape [P, 1], "
-              "each element represents a class label of a roi");
-    AddOutput("BboxTargets",
-              "(phi::DenseTensor), This output is a 2D phi::DenseTensor with "
-              "shape [P, 4 * "
-              "class_nums], "
-              "each element represents a box label of a roi");
-    AddOutput(
-        "BboxInsideWeights",
-        "(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape "
-        "[P, 4 * "
-        "class_nums], "
-        "each element indicates whether a box should contribute to loss.");
-    AddOutput(
-        "BboxOutsideWeights",
-        "(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape "
-        "[P, 4 * "
-        "class_nums], "
-        "each element indicates whether a box should contribute to loss.");
-    AddOutput("MaxOverlapWithGT",
-              "(phi::DenseTensor), This output is a 1D phi::DenseTensor with "
-              "shape [P], "
-              "each element indicates the maxoverlap "
-              "between output RoIs and ground-truth. "
-              "The output RoIs may include ground-truth "
-              "and the output maxoverlap may contain 1.");
-
-    AddAttr<int>("batch_size_per_im", "Batch size of rois per images.");
-    AddAttr<float>("fg_fraction",
-                   "Foreground fraction in total batch_size_per_im.");
-    AddAttr<float>(
-        "fg_thresh",
-        "Overlap threshold which is used to chose foreground sample.");
-    AddAttr<float>("bg_thresh_hi",
-                   "Overlap threshold upper bound which is used to chose "
-                   "background sample.");
-    AddAttr<float>("bg_thresh_lo",
-                   "Overlap threshold lower bound which is used to chose "
-                   "background sample.");
-    AddAttr<std::vector<float>>("bbox_reg_weights", "Box regression weights.");
-    AddAttr<int>("class_nums", "Class number.");
-    AddAttr<bool>(
-        "use_random",
-        "Use random sampling to choose foreground and background boxes.")
-        .SetDefault(true);
-    AddAttr<bool>("is_cascade_rcnn",
-                  "cascade rcnn sampling policy changed from stage 2.")
-        .SetDefault(false);
-    AddAttr<bool>(
-        "is_cls_agnostic",
-        "the box regress will only include fg and bg locations if set true ")
-        .SetDefault(false);
-
-    AddComment(R"DOC(
-This operator can be, for given the GenerateProposalOp output bounding boxes and groundtruth,
-to sample foreground boxes and background boxes, and compute loss target.
-
-RpnRois is the output boxes of RPN and was processed by generate_proposal_op, these boxes
-were combined with groundtruth boxes and sampled according to batch_size_per_im and fg_fraction,
-If an instance with a groundtruth overlap greater than fg_thresh, then it was considered as a foreground sample.
-If an instance with a groundtruth overlap greater than bg_thresh_lo and lower than bg_thresh_hi,
-then it was considered as a background sample.
-After all foreground and background boxes are chosen (so called Rois),
-then we apply random sampling to make sure
-the number of foreground boxes is no more than batch_size_per_im * fg_fraction.
-
-For each box in Rois, we assign the classification (class label) and regression targets (box label) to it.
-Finally BboxInsideWeights and BboxOutsideWeights are used to specify whether it would contribute to training loss.
-    )DOC");
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-REGISTER_OPERATOR(
-    generate_proposal_labels,
-    ops::GenerateProposalLabelsOp,
-    ops::GenerateProposalLabelsOpMaker,
-    paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
-    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
-PD_REGISTER_STRUCT_KERNEL(generate_proposal_labels,
-                          CPU,
-                          ALL_LAYOUT,
-                          ops::GenerateProposalLabelsKernel,
-                          float,
-                          double) {}
-
-REGISTER_OP_VERSION(generate_proposal_labels)
-    .AddCheckpoint(
-        R"ROC(
-              Upgrade of output [MaxOverlapWithGT])ROC",
-        paddle::framework::compatible::OpVersionDesc().NewOutput(
-            "MaxOverlapWithGT",
-            "The maxoverlap between output RoIs and ground-truth."))
-    .AddCheckpoint(
-        R"ROC(
-              Upgrade generate_proposal_labels add a new input [MaxOverlap])ROC",
-        paddle::framework::compatible::OpVersionDesc().NewInput(
-            "MaxOverlap", "MaxOverlap is dispensable."));
diff --git a/paddle/fluid/operators/detection/mask_util.cc b/paddle/fluid/operators/detection/mask_util.cc
deleted file mode 100644
index 5b4dc92f4f6af8..00000000000000
--- a/paddle/fluid/operators/detection/mask_util.cc
+++ /dev/null
@@ -1,242 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/detection/mask_util.h"
-
-#include <cmath>
-#include <cstdlib>
-
-#include "paddle/fluid/memory/memory.h"
-
-namespace paddle {
-namespace operators {
-
-uint32_t UMax(uint32_t a, uint32_t b) { return (a > b) ? a : b; }
-
-static inline int Compare(const void* a, const void* b) {
-  uint32_t c = *(reinterpret_cast<const uint32_t*>(a));
-  uint32_t d = *(reinterpret_cast<const uint32_t*>(b));
-  return c > d ? 1 : c < d ? -1 : 0;
-}
-
-void Decode(const uint32_t* cnts, int m, uint8_t* mask) {
-  uint8_t v = 0;
-  for (int j = 0; j < m; j++) {
-    for (uint32_t k = 0; k < cnts[j]; k++) {
-      *(mask++) = v;
-    }
-    v = !v;
-  }
-}
-
-typedef uint32_t uint;
-void Poly2Mask(const float* xy, int k, int h, int w, uint8_t* mask) {
-  int j = 0, m = 0;
-  double scale = 5;
-  int *x = nullptr, *y = nullptr, *u = nullptr, *v = nullptr;
-  uint *a = nullptr, *b = nullptr;
-  platform::CPUPlace cpu;
-  auto xptr = memory::Alloc(cpu, sizeof(int) * (k + 1) * 2);
-  x = reinterpret_cast<int*>(xptr->ptr());
-  y = x + (k + 1);
-
-  for (j = 0; j < k; j++)
-    x[j] = static_cast<int>(std::lround(scale * xy[j * 2 + 0]));
-  x[k] = x[0];
-  for (j = 0; j < k; j++)
-    y[j] = static_cast<int>(std::lround(scale * xy[j * 2 + 1]));
-  y[k] = y[0];
-  for (j = 0; j < k; j++) {
-    m += static_cast<int>(UMax(abs(x[j] - x[j + 1]), abs(y[j] - y[j + 1])) + 1);
-  }
-  auto vptr = memory::Alloc(cpu, sizeof(int) * m * 2);
-  u = reinterpret_cast<int*>(vptr->ptr());
-  v = u + m;
-  m = 0;
-  for (j = 0; j < k; j++) {
-    int xs = x[j], xe = x[j + 1], ys = y[j], ye = y[j + 1], dx = 0, dy = 0,
-        t = 0, d = 0;
-    int flip = 0;
-    double s = NAN;
-    dx = abs(xe - xs);
-    dy = abs(ys - ye);
-    flip = (dx >= dy && xs > xe) || (dx < dy && ys > ye);
-    if (flip) {
-      t = xs;
-      xs = xe;
-      xe = t;
-      t = ys;
-      ys = ye;
-      ye = t;
-    }
-    if (dx >= dy) {
-      s = dx == 0 ? 0 : static_cast<double>(ye - ys) / dx;
-      for (d = 0; d <= dx; d++) {
-        t = flip ? dx - d : d;
-        u[m] = t + xs;
-        v[m] = static_cast<int>(std::lround(ys + s * t));
-        m++;
-      }
-    } else {
-      s = dy == 0 ? 0 : static_cast<double>(xe - xs) / dy;
-      for (d = 0; d <= dy; d++) {
-        t = flip ? dy - d : d;
-        v[m] = t + ys;
-        u[m] = static_cast<int>(std::lround(xs + s * t));
-        m++;
-      }
-    }
-  }
-  /* get points along y-boundary and downsample */
-  k = m;
-  m = 0;
-  double xd = NAN, yd = NAN;
-  auto xyptr = memory::Alloc(cpu, sizeof(int) * k * 2);
-  x = reinterpret_cast<int*>(xyptr->ptr());
-  y = x + k;
-  for (j = 1; j < k; j++) {
-    if (u[j] != u[j - 1]) {
-      xd = static_cast<double>(u[j] < u[j - 1] ? u[j] : u[j] - 1);
-      xd = (xd + .5) / scale - .5;
-      if (floor(xd) != xd || xd < 0 || xd > w - 1) continue;
-      yd = static_cast<double>(v[j] < v[j - 1] ? v[j] : v[j - 1]);
-      yd = (yd + .5) / scale - .5;
-      if (yd < 0)
-        yd = 0;
-      else if (yd > h)
-        yd = h;
-      yd = ceil(yd);
-      x[m] = static_cast<int>(xd);
-      y[m] = static_cast<int>(yd);
-      m++;
-    }
-  }
-  /* compute rle encoding given y-boundary points */
-  k = m;
-  auto aptr = memory::Alloc(cpu, sizeof(uint) * (k + 1));
-  a = reinterpret_cast<uint*>(aptr->ptr());
-  for (j = 0; j < k; j++) a[j] = static_cast<uint>(x[j] * h + y[j]);
-  a[k++] = static_cast<uint>(h * w);
-
-  qsort(a, k, sizeof(uint), Compare);
-  uint p = 0;
-  for (j = 0; j < k; j++) {
-    uint t = a[j];
-    a[j] -= p;
-    p = t;
-  }
-  auto bptr = memory::Alloc(cpu, sizeof(uint32_t) * k);
-  b = reinterpret_cast<uint32_t*>(bptr->ptr());
-  j = m = 0;
-  b[m++] = a[j++];
-  while (j < k) {
-    if (a[j] > 0) {
-      b[m++] = a[j++];
-    } else {
-      j++;
-      if (j < k) b[m - 1] += a[j++];
-    }
-  }
-
-  // convert to mask
-  auto mskptr = memory::Alloc(cpu, sizeof(uint8_t) * h * w);
-  uint8_t* msk = reinterpret_cast<uint8_t*>(mskptr->ptr());
-  Decode(b, m, msk);
-
-  for (int ii = 0; ii < h; ++ii) {
-    for (int jj = 0; jj < w; ++jj) {
-      mask[ii * w + jj] = msk[jj * h + ii];
-    }
-  }
-}
-
-void Poly2Boxes(const std::vector<std::vector<std::vector<float>>>& polys,
-                float* boxes) {
-  // lists
-  for (size_t i = 0; i < polys.size(); ++i) {
-    float x0 = std::numeric_limits<float>::max();
-    float x1 = std::numeric_limits<float>::min();
-    float y0 = std::numeric_limits<float>::max();
-    float y1 = std::numeric_limits<float>::min();
-    // each list may have more than one polys
-    for (const auto& item : polys[i]) {
-      for (size_t k = 0; k < item.size() / 2; ++k) {
-        x0 = std::min(x0, item[2 * k]);
-        x1 = std::max(x1, item[2 * k]);
-        y0 = std::min(y0, item[2 * k + 1]);
-        y1 = std::max(y1, item[2 * k + 1]);
-      }
-    }
-    boxes[i * 4] = x0;
-    boxes[i * 4 + 1] = y0;
-    boxes[i * 4 + 2] = x1;
-    boxes[i * 4 + 3] = y1;
-  }
-}
-
-void Polys2MaskWrtBox(const std::vector<std::vector<float>>& polygons,
-                      const float* box,
-                      int M,
-                      uint8_t* mask) {
-  float w = box[2] - box[0];
-  float h = box[3] - box[1];
-  w = std::max(w, static_cast<float>(1.));
-  h = std::max(h, static_cast<float>(1.));
-
-  // short-circuit for case "polygons.size() == 1"
-  if (polygons.size() == 1UL) {
-    int k = static_cast<int>(polygons[0].size() / 2);
-    std::vector<float> p;
-    for (int j = 0; j < k; ++j) {
-      float pw = (polygons[0][2 * j] - box[0]) * M / w;      // NOLINT
-      float ph = (polygons[0][2 * j + 1] - box[1]) * M / h;  // NOLINT
-      p.push_back(pw);
-      p.push_back(ph);
-    }
-    Poly2Mask(p.data(), k, M, M, mask);
-
-    return;
-  }
-
-  uint8_t* msk = reinterpret_cast<uint8_t*>(
-      malloc(M * M * polygons.size() * sizeof(uint8_t)));  // NOLINT
-
-  for (size_t i = 0; i < polygons.size(); ++i) {
-    int k = static_cast<int>(polygons[i].size() / 2);
-    std::vector<float> p;
-    for (int j = 0; j < k; ++j) {
-      float pw = (polygons[i][2 * j] - box[0]) * M / w;      // NOLINT
-      float ph = (polygons[i][2 * j + 1] - box[1]) * M / h;  // NOLINT
-      p.push_back(pw);
-      p.push_back(ph);
-    }
-    uint8_t* msk_i = msk + i * M * M;
-    Poly2Mask(p.data(), k, M, M, msk_i);
-  }
-
-  for (size_t i = 0; i < polygons.size(); ++i) {
-    uint8_t* msk_i = msk + i * M * M;
-    for (int j = 0; j < M * M; ++j) {
-      if (i == 0) {
-        mask[j] = msk_i[j];
-      } else {
-        mask[j] = (mask[j] + msk_i[j]) > 0 ? 1 : 0;
-      }
-    }
-  }
-  free(msk);  // NOLINT
-}
-
-}  // namespace operators
-}  // namespace paddle
diff --git a/paddle/fluid/operators/detection/mask_util.h b/paddle/fluid/operators/detection/mask_util.h
deleted file mode 100644
index 587a9c53794def..00000000000000
--- a/paddle/fluid/operators/detection/mask_util.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-#include <stdint.h>
-
-#include <vector>
-
-#include "paddle/utils/test_macros.h"
-
-namespace paddle {
-namespace operators {
-
-TEST_API void Poly2Mask(const float* ploy, int k, int h, int w, uint8_t* mask);
-
-TEST_API void Poly2Boxes(
-    const std::vector<std::vector<std::vector<float>>>& polys, float* boxes);
-
-TEST_API void Polys2MaskWrtBox(const std::vector<std::vector<float>>& polygons,
-                               const float* box,
-                               int M,
-                               uint8_t* mask);
-}  // namespace operators
-}  // namespace paddle
diff --git a/test/cpp/fluid/CMakeLists.txt b/test/cpp/fluid/CMakeLists.txt
index 76aa8a6635225f..d07156f16d57c8 100644
--- a/test/cpp/fluid/CMakeLists.txt
+++ b/test/cpp/fluid/CMakeLists.txt
@@ -7,7 +7,7 @@ if(WITH_CINN)
   add_subdirectory(cinn)
 endif()
 add_subdirectory(controlflow)
-add_subdirectory(detection)
+
 if(WITH_DLNNE)
   add_subdirectory(dlnne)
 endif()
diff --git a/test/cpp/fluid/detection/CMakeLists.txt b/test/cpp/fluid/detection/CMakeLists.txt
deleted file mode 100644
index 6a69241e7846ef..00000000000000
--- a/test/cpp/fluid/detection/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-paddle_test(mask_util_test SRCS mask_util_test.cc)
-
-if(WITH_ONNXRUNTIME AND WIN32)
-  # Copy onnxruntime for some c++ test in Windows, since the test will
-  # be build only in CI, so suppose the generator in Windows is Ninja.
-  copy_onnx(mask_util_test)
-endif()
diff --git a/test/cpp/fluid/detection/mask_util_test.cc b/test/cpp/fluid/detection/mask_util_test.cc
deleted file mode 100644
index 274850c0a67dcd..00000000000000
--- a/test/cpp/fluid/detection/mask_util_test.cc
+++ /dev/null
@@ -1,126 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/detection/mask_util.h"
-
-#include <gtest/gtest.h>
-
-#include "paddle/fluid/memory/memory.h"
-
-namespace paddle {
-namespace operators {
-
-template <typename T>
-void Compare(const T* a, const T* b, const int n) {
-  for (int i = 0; i < n; i++) {
-    EXPECT_EQ(a[i], b[i]);
-  }
-}
-
-TEST(MaskUtil, Poly2MaskTest) {
-  float polys[] = {// NOLINT
-                   1.97f,
-                   1.88f,
-                   5.81f,
-                   1.88f,
-                   1.69f,
-                   6.53f,
-                   5.94f,
-                   6.38f,
-                   1.97f,
-                   1.88f};
-  int h = 8, w = 8;
-  int k = 5;  // length(polys) / 2
-  // clang-format off
-  uint8_t expect_mask[] = { // NOLINT
-      0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 1, 1, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 1, 0, 0, 0, 0,
-      0, 0, 1, 1, 1, 0, 0, 0,
-      0, 0, 1, 1, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0
-  };
-  // clang-format on
-
-  // the ground-truth mask is computed by coco API:
-  //
-  // import pycocotools.mask as mask_util
-  // import numpy as np
-  // segm = [1.97, 1.88, 5.81, 1.88, 1.69, 6.53, 5.94, 6.38, 1.97, 1.88]
-  // rles = mask_util.frPyObjects([segm], im_h, im_w)
-  // mask = mask_util.decode(rles)
-  // print mask
-  platform::CPUPlace cpu;
-  auto allocation = memory::Alloc(cpu, sizeof(expect_mask));
-  uint8_t* mask = reinterpret_cast<uint8_t*>(allocation->ptr());
-  Poly2Mask(polys, k, h, w, mask);
-  Compare<uint8_t>(expect_mask, mask, h * w);
-}
-
-TEST(MaskUtil, Poly2BoxesTest) {
-  // clang-format off
-  std::vector<std::vector<std::vector<float>>> polys = {
-      {{1.97f, 1.88f, 5.81f, 1.88f, 1.69f, 6.53f, 5.94f, 6.38f, 1.97f, 1.88f}},
-      {{2.97f, 1.88f, 3.81f, 1.68f, 1.69f, 6.63f, 6.94f, 6.58f, 2.97f, 0.88f}}
-  };
-  float expect_boxes[] = { // NOLINT
-      1.69f, 1.88f, 5.94f, 6.53f,
-      1.69f, 0.88f, 6.94f, 6.63f
-  };
-  // clang-format on
-
-  platform::CPUPlace cpu;
-  auto allocation = memory::Alloc(cpu, sizeof(expect_boxes));
-  float* boxes = reinterpret_cast<float*>(allocation->ptr());
-  Poly2Boxes(polys, boxes);
-  Compare<float>(expect_boxes, boxes, 8);
-}
-
-TEST(MaskUtil, Polys2MaskWrtBoxTest) {
-  // clang-format off
-  std::vector<std::vector<std::vector<float>>> polys = {{
-      {1.97f, 1.88f, 5.81f, 1.88f, 1.69f, 6.53f, 5.94f, 6.38f, 1.97f, 1.88f},
-      {2.97f, 1.88f, 3.81f, 1.68f, 1.69f, 6.63f, 6.94f, 6.58f, 2.97f, 0.88f}}};
-  float expect_boxes[] = { // NOLINT
-      1.69f, 0.88f, 6.94f, 6.63f
-  };
-  uint8_t expect_mask[] = { // NOLINT
-      0, 0, 0, 0, 0, 0, 0, 0,
-      0, 1, 1, 1, 1, 1, 0, 0,
-      0, 0, 1, 1, 1, 0, 0, 0,
-      0, 0, 1, 1, 1, 0, 0, 0,
-      0, 0, 1, 1, 1, 0, 0, 0,
-      0, 1, 1, 1, 1, 1, 0, 0,
-      0, 1, 1, 1, 1, 1, 1, 0,
-      1, 1, 1, 1, 1, 1, 1, 1
-  };
-  // clang-format on
-
-  platform::CPUPlace cpu;
-  auto allocation = memory::Alloc(cpu, sizeof(expect_boxes));
-  float* boxes = reinterpret_cast<float*>(allocation->ptr());
-  Poly2Boxes(polys, boxes);
-  Compare<float>(expect_boxes, boxes, 4);
-
-  auto allocation_mask = memory::Alloc(cpu, sizeof(expect_mask));
-  uint8_t* mask = reinterpret_cast<uint8_t*>(allocation_mask->ptr());
-  int M = 8;
-  Polys2MaskWrtBox(polys[0], expect_boxes, M, mask);
-  Compare<uint8_t>(expect_mask, mask, M * M);
-}
-
-}  // namespace operators
-}  // namespace paddle
diff --git a/test/legacy_test/test_box_decoder_and_assign_op.py b/test/legacy_test/test_box_decoder_and_assign_op.py
deleted file mode 100644
index 555e5fbd2c6f7a..00000000000000
--- a/test/legacy_test/test_box_decoder_and_assign_op.py
+++ /dev/null
@@ -1,95 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import numpy as np
-from op_test import OpTest
-
-
-def box_decoder_and_assign(deltas, weights, boxes, box_score, box_clip):
-    boxes = boxes.astype(deltas.dtype, copy=False)
-    widths = boxes[:, 2] - boxes[:, 0] + 1.0
-    heights = boxes[:, 3] - boxes[:, 1] + 1.0
-    ctr_x = boxes[:, 0] + 0.5 * widths
-    ctr_y = boxes[:, 1] + 0.5 * heights
-    wx, wy, ww, wh = weights
-    dx = deltas[:, 0::4] * wx
-    dy = deltas[:, 1::4] * wy
-    dw = deltas[:, 2::4] * ww
-    dh = deltas[:, 3::4] * wh
-    # Prevent sending too large values into np.exp()
-    dw = np.minimum(dw, box_clip)
-    dh = np.minimum(dh, box_clip)
-    pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
-    pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
-    pred_w = np.exp(dw) * widths[:, np.newaxis]
-    pred_h = np.exp(dh) * heights[:, np.newaxis]
-    pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
-    # x1
-    pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
-    # y1
-    pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
-    # x2 (note: "- 1" is correct; don't be fooled by the asymmetry)
-    pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1
-    # y2 (note: "- 1" is correct; don't be fooled by the asymmetry)
-    pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1
-
-    output_assign_box = []
-    for ino in range(len(pred_boxes)):
-        rank = np.argsort(-box_score[ino])
-        maxidx = rank[0]
-        if maxidx == 0:
-            maxidx = rank[1]
-        beg_pos = maxidx * 4
-        end_pos = maxidx * 4 + 4
-        output_assign_box.append(pred_boxes[ino, beg_pos:end_pos])
-    output_assign_box = np.array(output_assign_box)
-
-    return pred_boxes, output_assign_box
-
-
-class TestBoxDecoderAndAssignOpWithLoD(OpTest):
-    def test_check_output(self):
-        # NODE(yjjiang11): This op will be deprecated.
-        self.check_output(check_dygraph=False)
-
-    def setUp(self):
-        self.op_type = "box_decoder_and_assign"
-        lod = [[4, 8, 8]]
-        num_classes = 10
-        prior_box = np.random.random((20, 4)).astype('float32')
-        prior_box_var = np.array([0.1, 0.1, 0.2, 0.2], dtype=np.float32)
-        target_box = np.random.random((20, 4 * num_classes)).astype('float32')
-        box_score = np.random.random((20, num_classes)).astype('float32')
-        box_clip = 4.135
-        output_box, output_assign_box = box_decoder_and_assign(
-            target_box, prior_box_var, prior_box, box_score, box_clip
-        )
-
-        self.inputs = {
-            'PriorBox': (prior_box, lod),
-            'PriorBoxVar': prior_box_var,
-            'TargetBox': (target_box, lod),
-            'BoxScore': (box_score, lod),
-        }
-        self.attrs = {'box_clip': box_clip}
-        self.outputs = {
-            'DecodeBox': output_box,
-            'OutputAssignBox': output_assign_box,
-        }
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/test/legacy_test/test_density_prior_box_op.py b/test/legacy_test/test_density_prior_box_op.py
deleted file mode 100644
index 9d621dc551111f..00000000000000
--- a/test/legacy_test/test_density_prior_box_op.py
+++ /dev/null
@@ -1,173 +0,0 @@
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import math
-import unittest
-
-import numpy as np
-from op_test import OpTest
-
-
-class TestDensityPriorBoxOp(OpTest):
-    def set_data(self):
-        self.init_test_params()
-        self.init_test_input()
-        self.init_test_output()
-        self.inputs = {'Input': self.input, 'Image': self.image}
-
-        self.attrs = {
-            'variances': self.variances,
-            'clip': self.clip,
-            'step_w': self.step_w,
-            'step_h': self.step_h,
-            'offset': self.offset,
-            'densities': self.densities,
-            'fixed_sizes': self.fixed_sizes,
-            'fixed_ratios': self.fixed_ratios,
-            'flatten_to_2d': self.flatten_to_2d,
-        }
-        self.outputs = {'Boxes': self.out_boxes, 'Variances': self.out_var}
-
-    def test_check_output(self):
-        self.check_output()
-
-    def setUp(self):
-        self.op_type = "density_prior_box"
-        self.set_data()
-
-    def set_density(self):
-        self.densities = [4, 2, 1]
-        self.fixed_sizes = [32.0, 64.0, 128.0]
-        self.fixed_ratios = [1.0]
-        self.layer_w = 17
-        self.layer_h = 17
-        self.image_w = 533
-        self.image_h = 533
-        self.flatten_to_2d = False
-
-    def init_test_params(self):
-        self.set_density()
-
-        self.step_w = float(self.image_w) / float(self.layer_w)
-        self.step_h = float(self.image_h) / float(self.layer_h)
-
-        self.input_channels = 2
-        self.image_channels = 3
-        self.batch_size = 10
-
-        self.variances = [0.1, 0.1, 0.2, 0.2]
-        self.variances = np.array(self.variances, dtype=np.float64).flatten()
-
-        self.clip = True
-        self.num_priors = 0
-        if len(self.fixed_sizes) > 0 and len(self.densities) > 0:
-            for density in self.densities:
-                if len(self.fixed_ratios) > 0:
-                    self.num_priors += len(self.fixed_ratios) * (
-                        pow(density, 2)
-                    )
-        self.offset = 0.5
-
-    def init_test_input(self):
-        self.image = np.random.random(
-            (self.batch_size, self.image_channels, self.image_w, self.image_h)
-        ).astype('float32')
-
-        self.input = np.random.random(
-            (self.batch_size, self.input_channels, self.layer_w, self.layer_h)
-        ).astype('float32')
-
-    def init_test_output(self):
-        out_dim = (self.layer_h, self.layer_w, self.num_priors, 4)
-        out_boxes = np.zeros(out_dim).astype('float32')
-        out_var = np.zeros(out_dim).astype('float32')
-
-        step_average = int((self.step_w + self.step_h) * 0.5)
-        for h in range(self.layer_h):
-            for w in range(self.layer_w):
-                idx = 0
-                c_x = (w + self.offset) * self.step_w
-                c_y = (h + self.offset) * self.step_h
-                # Generate density prior boxes with fixed size
-                for density, fixed_size in zip(
-                    self.densities, self.fixed_sizes
-                ):
-                    if len(self.fixed_ratios) > 0:
-                        for ar in self.fixed_ratios:
-                            shift = int(step_average / density)
-                            box_width_ratio = fixed_size * math.sqrt(ar)
-                            box_height_ratio = fixed_size / math.sqrt(ar)
-                            for di in range(density):
-                                for dj in range(density):
-                                    c_x_temp = (
-                                        c_x
-                                        - step_average / 2.0
-                                        + shift / 2.0
-                                        + dj * shift
-                                    )
-                                    c_y_temp = (
-                                        c_y
-                                        - step_average / 2.0
-                                        + shift / 2.0
-                                        + di * shift
-                                    )
-                                    out_boxes[h, w, idx, :] = [
-                                        max(
-                                            (c_x_temp - box_width_ratio / 2.0)
-                                            / self.image_w,
-                                            0,
-                                        ),
-                                        max(
-                                            (c_y_temp - box_height_ratio / 2.0)
-                                            / self.image_h,
-                                            0,
-                                        ),
-                                        min(
-                                            (c_x_temp + box_width_ratio / 2.0)
-                                            / self.image_w,
-                                            1,
-                                        ),
-                                        min(
-                                            (c_y_temp + box_height_ratio / 2.0)
-                                            / self.image_h,
-                                            1,
-                                        ),
-                                    ]
-                                    idx += 1
-        if self.clip:
-            out_boxes = np.clip(out_boxes, 0.0, 1.0)
-        out_var = np.tile(
-            self.variances, (self.layer_h, self.layer_w, self.num_priors, 1)
-        )
-        self.out_boxes = out_boxes.astype('float32')
-        self.out_var = out_var.astype('float32')
-        if self.flatten_to_2d:
-            self.out_boxes = self.out_boxes.reshape((-1, 4))
-            self.out_var = self.out_var.reshape((-1, 4))
-
-
-class TestDensityPriorBox(TestDensityPriorBoxOp):
-    def set_density(self):
-        self.densities = [3, 4]
-        self.fixed_sizes = [1.0, 2.0]
-        self.fixed_ratios = [1.0]
-        self.layer_w = 32
-        self.layer_h = 32
-        self.image_w = 40
-        self.image_h = 40
-        self.flatten_to_2d = True
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/test/legacy_test/test_generate_mask_labels_op.py b/test/legacy_test/test_generate_mask_labels_op.py
deleted file mode 100644
index 86ab3cb0888793..00000000000000
--- a/test/legacy_test/test_generate_mask_labels_op.py
+++ /dev/null
@@ -1,317 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import math
-import unittest
-
-import numpy as np
-
-'''
-# Equivalent code
-rles = mask_util.frPyObjects([segm], im_h, im_w)
-mask = mask_util.decode(rles)
-'''
-
-
-def decode(cnts, m):
-    v = 0
-    mask = []
-    for j in range(m):
-        for k in range(cnts[j]):
-            mask.append(v)
-        v = 1 - v
-    return mask
-
-
-def poly2mask(xy, k, h, w):
-    scale = 5.0
-    x = [int(scale * p + 0.5) for p in xy[::2]]
-    x = x + [x[0]]
-    y = [int(scale * p + 0.5) for p in xy[1::2]]
-    y = y + [y[0]]
-    m = sum(
-        [
-            int(max(abs(x[j] - x[j + 1]), abs(y[j] - y[j + 1]))) + 1
-            for j in range(k)
-        ]
-    )
-
-    u, v = [], []
-    for j in range(k):
-        xs = x[j]
-        xe = x[j + 1]
-        ys = y[j]
-        ye = y[j + 1]
-        dx = abs(xe - xs)
-        dy = abs(ys - ye)
-        flip = (dx >= dy and xs > xe) or (dx < dy and ys > ye)
-        if flip:
-            xs, xe = xe, xs
-            ys, ye = ye, ys
-
-        if dx >= dy:
-            if dx == 0:
-                assert ye - ys == 0
-            s = 0 if dx == 0 else float(ye - ys) / dx
-        else:
-            if dy == 0:
-                assert xe - xs == 0
-            s = 0 if dy == 0 else float(xe - xs) / dy
-
-        if dx >= dy:
-            ts = [dx - d if flip else d for d in range(dx + 1)]
-            u.extend([xs + t for t in ts])
-            v.extend([int(ys + s * t + 0.5) for t in ts])
-        else:
-            ts = [dy - d if flip else d for d in range(dy + 1)]
-            v.extend([t + ys for t in ts])
-            u.extend([int(xs + s * t + 0.5) for t in ts])
-
-    k = len(u)
-    x = np.zeros((k), np.int_)
-    y = np.zeros((k), np.int_)
-    m = 0
-    for j in range(1, k):
-        if u[j] != u[j - 1]:
-            xd = float(u[j] if (u[j] < u[j - 1]) else (u[j] - 1))
-            xd = (xd + 0.5) / scale - 0.5
-            if math.floor(xd) != xd or xd < 0 or xd > (w - 1):
-                continue
-            yd = float(v[j] if v[j] < v[j - 1] else v[j - 1])
-            yd = (yd + 0.5) / scale - 0.5
-            yd = math.ceil(0 if yd < 0 else (h if yd > h else yd))
-            x[m] = int(xd)
-            y[m] = int(yd)
-            m += 1
-    k = m
-    a = [int(x[i] * h + y[i]) for i in range(k)]
-    a.append(h * w)
-    a.sort()
-    b = [0] + a[: len(a) - 1]
-    a = [c - d for (c, d) in zip(a, b)]
-
-    k += 1
-    b = [0 for i in range(k)]
-    b[0] = a[0]
-    m, j = 1, 1
-    while j < k:
-        if a[j] > 0:
-            b[m] = a[j]
-            m += 1
-            j += 1
-        else:
-            j += 1
-            if j < k:
-                b[m - 1] += a[j]
-                j += 1
-    mask = decode(b, m)
-    mask = np.array(mask, dtype=np.int_).reshape((w, h))
-    mask = mask.transpose((1, 0))
-    return mask
-
-
-def polys_to_boxes(polys):
-    """Convert a list of polygons into an array of tight bounding boxes."""
-    boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32)
-    for i in range(len(polys)):
-        poly = polys[i]
-        x0 = min(min(p[::2]) for p in poly)
-        x1 = max(max(p[::2]) for p in poly)
-        y0 = min(min(p[1::2]) for p in poly)
-        y1 = max(max(p[1::2]) for p in poly)
-        boxes_from_polys[i, :] = [x0, y0, x1, y1]
-    return boxes_from_polys
-
-
-def bbox_overlaps(boxes, query_boxes):
-    N = boxes.shape[0]
-    K = query_boxes.shape[0]
-    overlaps = np.zeros((N, K), dtype=boxes.dtype)
-    for k in range(K):
-        box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (
-            query_boxes[k, 3] - query_boxes[k, 1] + 1
-        )
-        for n in range(N):
-            iw = (
-                min(boxes[n, 2], query_boxes[k, 2])
-                - max(boxes[n, 0], query_boxes[k, 0])
-                + 1
-            )
-            if iw > 0:
-                ih = (
-                    min(boxes[n, 3], query_boxes[k, 3])
-                    - max(boxes[n, 1], query_boxes[k, 1])
-                    + 1
-                )
-                if ih > 0:
-                    ua = float(
-                        (boxes[n, 2] - boxes[n, 0] + 1)
-                        * (boxes[n, 3] - boxes[n, 1] + 1)
-                        + box_area
-                        - iw * ih
-                    )
-                    overlaps[n, k] = iw * ih / ua
-    return overlaps
-
-
-def polys_to_mask_wrt_box(polygons, box, M):
-    """Convert from the COCO polygon segmentation format to a binary mask
-    encoded as a 2D array of data type numpy.float32. The polygon segmentation
-    is understood to be enclosed in the given box and rasterized to an M x M
-    mask. The resulting mask is therefore of shape (M, M).
-    """
-    w = box[2] - box[0]
-    h = box[3] - box[1]
-
-    w = np.maximum(w, 1)
-    h = np.maximum(h, 1)
-
-    polygons_norm = []
-    for poly in polygons:
-        p = np.array(poly, dtype=np.float32)
-        p[0::2] = (p[0::2] - box[0]) * M / w
-        p[1::2] = (p[1::2] - box[1]) * M / h
-        polygons_norm.append(p)
-
-    mask = []
-    for polygons in polygons_norm:
-        assert polygons.shape[0] % 2 == 0
-        k = polygons.shape[0] // 2
-        mask.append(poly2mask(polygons, k, M, M))
-    mask = np.array(mask)
-    # Flatten in case polygons was a list
-    mask = np.sum(mask, axis=0)
-    mask = np.array(mask > 0, dtype=np.float32)
-    return mask
-
-
-def expand_mask_targets(masks, mask_class_labels, resolution, num_classes):
-    """Expand masks from shape (#masks, resolution ** 2)
-    to (#masks, #classes * resolution ** 2) to encode class
-    specific mask targets.
-    """
-    assert masks.shape[0] == mask_class_labels.shape[0]
-
-    # Target values of -1 are "don't care" / ignore labels
-    mask_targets = -np.ones(
-        (masks.shape[0], num_classes * resolution**2), dtype=np.int32
-    )
-    for i in range(masks.shape[0]):
-        cls = int(mask_class_labels[i])
-        start = resolution**2 * cls
-        end = start + resolution**2
-        # Ignore background instance
-        # (only happens when there is no fg samples in an image)
-        if cls > 0:
-            mask_targets[i, start:end] = masks[i, :]
-    return mask_targets
-
-
-def generate_mask_labels(
-    num_classes,
-    im_info,
-    gt_classes,
-    is_crowd,
-    label_int32,
-    gt_polys,
-    resolution,
-    rois,
-    roi_lod,
-    gt_lod,
-):
-    mask_rois = []
-    roi_has_mask_int32 = []
-    mask_int32 = []
-    new_lod = []
-    for i in range(len(im_info)):
-        roi_s = roi_lod[i]
-        roi_e = roi_lod[i + 1]
-        gt_s = gt_lod[i]
-        gt_e = gt_lod[i + 1]
-        mask_blob = _sample_mask(
-            num_classes,
-            im_info[i],
-            gt_classes[gt_s:gt_e],
-            is_crowd[gt_s:gt_e],
-            label_int32[roi_s:roi_e],
-            gt_polys[i],
-            resolution,
-            rois[roi_s:roi_e],
-        )
-        new_lod.append(mask_blob['mask_rois'].shape[0])
-        mask_rois.append(mask_blob['mask_rois'])
-        roi_has_mask_int32.append(mask_blob['roi_has_mask_int32'])
-        mask_int32.append(mask_blob['mask_int32'])
-    return mask_rois, roi_has_mask_int32, mask_int32, new_lod
-
-
-def _sample_mask(
-    num_classes,
-    im_info,
-    gt_classes,
-    is_crowd,
-    label_int32,
-    gt_polys,  # [[[], []], []]
-    resolution,
-    rois,
-):
-    mask_blob = {}
-    im_scale = im_info[2]
-    sample_boxes = rois
-    polys_gt_inds = np.where((gt_classes > 0) & (is_crowd == 0))[0]
-    polys_gt = [gt_polys[i] for i in polys_gt_inds]
-    boxes_from_polys = polys_to_boxes(polys_gt)
-
-    fg_inds = np.where(label_int32 > 0)[0]
-    roi_has_mask = fg_inds.copy()
-    if fg_inds.shape[0] > 0:
-        mask_class_labels = label_int32[fg_inds]
-        masks = np.zeros((fg_inds.shape[0], resolution**2), dtype=np.int32)
-        rois_fg = sample_boxes[fg_inds]
-        overlaps_bbfg_bbpolys = bbox_overlaps(
-            rois_fg.astype(np.float32), boxes_from_polys.astype(np.float32)
-        )
-        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)
-        for i in range(rois_fg.shape[0]):
-            fg_polys_ind = fg_polys_inds[i]
-            poly_gt = polys_gt[fg_polys_ind]
-            roi_fg = rois_fg[i]
-            mask = polys_to_mask_wrt_box(poly_gt, roi_fg, resolution)
-            mask = np.array(mask > 0, dtype=np.int32)
-            masks[i, :] = np.reshape(mask, resolution**2)
-    else:
-        bg_inds = np.where(label_int32 == 0)[0]
-        rois_fg = sample_boxes[bg_inds[0]].reshape((1, -1))
-        masks = -np.ones((1, resolution**2), dtype=np.int32)
-        mask_class_labels = np.zeros((1,))
-        roi_has_mask = np.append(roi_has_mask, 0)
-    masks = expand_mask_targets(
-        masks, mask_class_labels, resolution, num_classes
-    )
-    rois_fg *= im_scale
-    mask_blob['mask_rois'] = rois_fg
-    mask_blob['roi_has_mask_int32'] = roi_has_mask
-    mask_blob['mask_int32'] = masks
-    return mask_blob
-
-
-def trans_lod(lod):
-    new_lod = [0]
-    for i in range(len(lod)):
-        new_lod.append(lod[i] + new_lod[i])
-    return new_lod
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/test/legacy_test/test_generate_proposal_labels_op.py b/test/legacy_test/test_generate_proposal_labels_op.py
deleted file mode 100644
index 903201b9856a7c..00000000000000
--- a/test/legacy_test/test_generate_proposal_labels_op.py
+++ /dev/null
@@ -1,553 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import numpy as np
-from op_test import OpTest
-
-
-def generate_proposal_labels_in_python(
-    rpn_rois,
-    gt_classes,
-    is_crowd,
-    gt_boxes,
-    im_info,
-    batch_size_per_im,
-    fg_fraction,
-    fg_thresh,
-    bg_thresh_hi,
-    bg_thresh_lo,
-    bbox_reg_weights,
-    class_nums,
-    use_random,
-    is_cls_agnostic,
-    is_cascade_rcnn,
-    max_overlaps=None,
-):
-    rois = []
-    labels_int32 = []
-    bbox_targets = []
-    bbox_inside_weights = []
-    bbox_outside_weights = []
-    max_overlap_with_gt = []
-    lod = []
-    assert len(rpn_rois) == len(
-        im_info
-    ), 'batch size of rpn_rois and ground_truth is not matched'
-
-    for im_i in range(len(im_info)):
-        max_overlap = max_overlaps[im_i] if is_cascade_rcnn else None
-        frcn_blobs = _sample_rois(
-            rpn_rois[im_i],
-            gt_classes[im_i],
-            is_crowd[im_i],
-            gt_boxes[im_i],
-            im_info[im_i],
-            batch_size_per_im,
-            fg_fraction,
-            fg_thresh,
-            bg_thresh_hi,
-            bg_thresh_lo,
-            bbox_reg_weights,
-            class_nums,
-            use_random,
-            is_cls_agnostic,
-            is_cascade_rcnn,
-            max_overlap,
-        )
-        lod.append(frcn_blobs['rois'].shape[0])
-        rois.append(frcn_blobs['rois'])
-        labels_int32.append(frcn_blobs['labels_int32'])
-        bbox_targets.append(frcn_blobs['bbox_targets'])
-        bbox_inside_weights.append(frcn_blobs['bbox_inside_weights'])
-        bbox_outside_weights.append(frcn_blobs['bbox_outside_weights'])
-        max_overlap_with_gt.append(frcn_blobs['max_overlap'])
-
-    return (
-        rois,
-        labels_int32,
-        bbox_targets,
-        bbox_inside_weights,
-        bbox_outside_weights,
-        max_overlap_with_gt,
-        lod,
-    )
-
-
-def filter_roi(rois, max_overlap):
-    ws = rois[:, 2] - rois[:, 0] + 1
-    hs = rois[:, 3] - rois[:, 1] + 1
-    keep = np.where((ws > 0) & (hs > 0) & (max_overlap < 1.0))[0]
-    if len(keep) > 0:
-        return rois[keep, :]
-    return np.zeros((1, 4)).astype('float32')
-
-
-def _sample_rois(
-    rpn_rois,
-    gt_classes,
-    is_crowd,
-    gt_boxes,
-    im_info,
-    batch_size_per_im,
-    fg_fraction,
-    fg_thresh,
-    bg_thresh_hi,
-    bg_thresh_lo,
-    bbox_reg_weights,
-    class_nums,
-    use_random,
-    is_cls_agnostic,
-    is_cascade_rcnn,
-    max_overlap,
-):
-    rois_per_image = int(batch_size_per_im)
-    fg_rois_per_im = int(np.round(fg_fraction * rois_per_image))
-
-    # Roidb
-    im_scale = im_info[2]
-    inv_im_scale = 1.0 / im_scale
-    rpn_rois = rpn_rois * inv_im_scale
-
-    if is_cascade_rcnn:
-        rpn_rois = filter_roi(rpn_rois, max_overlap)
-
-    boxes = np.vstack([gt_boxes, rpn_rois])
-
-    gt_overlaps = np.zeros((boxes.shape[0], class_nums))
-    box_to_gt_ind_map = np.zeros((boxes.shape[0]), dtype=np.int32)
-    proposal_to_gt_overlaps = _bbox_overlaps(boxes, gt_boxes)
-
-    overlaps_argmax = proposal_to_gt_overlaps.argmax(axis=1)
-    overlaps_max = proposal_to_gt_overlaps.max(axis=1)
-    # Boxes which with non-zero overlap with gt boxes
-    overlapped_boxes_ind = np.where(overlaps_max > 0)[0]
-    overlapped_boxes_gt_classes = gt_classes[
-        overlaps_argmax[overlapped_boxes_ind]
-    ]
-    gt_overlaps[
-        overlapped_boxes_ind, overlapped_boxes_gt_classes
-    ] = overlaps_max[overlapped_boxes_ind]
-    box_to_gt_ind_map[overlapped_boxes_ind] = overlaps_argmax[
-        overlapped_boxes_ind
-    ]
-
-    crowd_ind = np.where(is_crowd)[0]
-    gt_overlaps[crowd_ind] = -1.0
-    max_overlaps = gt_overlaps.max(axis=1)
-    max_classes = gt_overlaps.argmax(axis=1)
-
-    if is_cascade_rcnn:
-        # Cascade RCNN Decode Filter
-        fg_inds = np.where(max_overlaps >= fg_thresh)[0]
-        bg_inds = np.where(
-            (max_overlaps < bg_thresh_hi) & (max_overlaps >= bg_thresh_lo)
-        )[0]
-        fg_rois_per_this_image = fg_inds.shape[0]
-        bg_rois_per_this_image = bg_inds.shape[0]
-    else:
-        # Foreground
-        fg_inds = np.where(max_overlaps >= fg_thresh)[0]
-        fg_rois_per_this_image = np.minimum(fg_rois_per_im, fg_inds.shape[0])
-        # Sample foreground if there are too many
-        if (fg_inds.shape[0] > fg_rois_per_this_image) and use_random:
-            fg_inds = np.random.choice(
-                fg_inds, size=fg_rois_per_this_image, replace=False
-            )
-        fg_inds = fg_inds[:fg_rois_per_this_image]
-        # Background
-        bg_inds = np.where(
-            (max_overlaps < bg_thresh_hi) & (max_overlaps >= bg_thresh_lo)
-        )[0]
-        bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
-        bg_rois_per_this_image = np.minimum(
-            bg_rois_per_this_image, bg_inds.shape[0]
-        )
-        # Sample background if there are too many
-        if (bg_inds.shape[0] > bg_rois_per_this_image) and use_random:
-            bg_inds = np.random.choice(
-                bg_inds, size=bg_rois_per_this_image, replace=False
-            )
-        bg_inds = bg_inds[:bg_rois_per_this_image]
-
-    keep_inds = np.append(fg_inds, bg_inds)
-    sampled_labels = max_classes[keep_inds]
-    sampled_labels[fg_rois_per_this_image:] = 0
-    sampled_boxes = boxes[keep_inds]
-    sampled_max_overlap = max_overlaps[keep_inds]
-    sampled_gts = gt_boxes[box_to_gt_ind_map[keep_inds]]
-    sampled_gts[fg_rois_per_this_image:, :] = gt_boxes[0]
-    bbox_label_targets = _compute_targets(
-        sampled_boxes, sampled_gts, sampled_labels, bbox_reg_weights
-    )
-    bbox_targets, bbox_inside_weights = _expand_bbox_targets(
-        bbox_label_targets, class_nums, is_cls_agnostic
-    )
-    bbox_outside_weights = np.array(
-        bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype
-    )
-    # Scale rois
-    sampled_rois = sampled_boxes * im_scale
-
-    # Faster RCNN blobs
-    frcn_blobs = {
-        'rois': sampled_rois,
-        'labels_int32': sampled_labels,
-        'bbox_targets': bbox_targets,
-        'bbox_inside_weights': bbox_inside_weights,
-        'bbox_outside_weights': bbox_outside_weights,
-        'max_overlap': sampled_max_overlap,
-    }
-    return frcn_blobs
-
-
-def _bbox_overlaps(roi_boxes, gt_boxes):
-    w1 = np.maximum(roi_boxes[:, 2] - roi_boxes[:, 0] + 1, 0)
-    h1 = np.maximum(roi_boxes[:, 3] - roi_boxes[:, 1] + 1, 0)
-    w2 = np.maximum(gt_boxes[:, 2] - gt_boxes[:, 0] + 1, 0)
-    h2 = np.maximum(gt_boxes[:, 3] - gt_boxes[:, 1] + 1, 0)
-    area1 = w1 * h1
-    area2 = w2 * h2
-
-    overlaps = np.zeros((roi_boxes.shape[0], gt_boxes.shape[0]))
-    for ind1 in range(roi_boxes.shape[0]):
-        for ind2 in range(gt_boxes.shape[0]):
-            inter_x1 = np.maximum(roi_boxes[ind1, 0], gt_boxes[ind2, 0])
-            inter_y1 = np.maximum(roi_boxes[ind1, 1], gt_boxes[ind2, 1])
-            inter_x2 = np.minimum(roi_boxes[ind1, 2], gt_boxes[ind2, 2])
-            inter_y2 = np.minimum(roi_boxes[ind1, 3], gt_boxes[ind2, 3])
-            inter_w = np.maximum(inter_x2 - inter_x1 + 1, 0)
-            inter_h = np.maximum(inter_y2 - inter_y1 + 1, 0)
-            inter_area = inter_w * inter_h
-            iou = inter_area / (area1[ind1] + area2[ind2] - inter_area)
-            overlaps[ind1, ind2] = iou
-    return overlaps
-
-
-def _compute_targets(roi_boxes, gt_boxes, labels, bbox_reg_weights):
-    assert roi_boxes.shape[0] == gt_boxes.shape[0]
-    assert roi_boxes.shape[1] == 4
-    assert gt_boxes.shape[1] == 4
-
-    targets = np.zeros(roi_boxes.shape)
-    bbox_reg_weights = np.asarray(bbox_reg_weights)
-    targets = _box_to_delta(
-        ex_boxes=roi_boxes, gt_boxes=gt_boxes, weights=bbox_reg_weights
-    )
-
-    return np.hstack([labels[:, np.newaxis], targets]).astype(
-        np.float32, copy=False
-    )
-
-
-def _box_to_delta(ex_boxes, gt_boxes, weights):
-    ex_w = ex_boxes[:, 2] - ex_boxes[:, 0] + 1
-    ex_h = ex_boxes[:, 3] - ex_boxes[:, 1] + 1
-    ex_ctr_x = ex_boxes[:, 0] + 0.5 * ex_w
-    ex_ctr_y = ex_boxes[:, 1] + 0.5 * ex_h
-
-    gt_w = gt_boxes[:, 2] - gt_boxes[:, 0] + 1
-    gt_h = gt_boxes[:, 3] - gt_boxes[:, 1] + 1
-    gt_ctr_x = gt_boxes[:, 0] + 0.5 * gt_w
-    gt_ctr_y = gt_boxes[:, 1] + 0.5 * gt_h
-
-    dx = (gt_ctr_x - ex_ctr_x) / ex_w / weights[0]
-    dy = (gt_ctr_y - ex_ctr_y) / ex_h / weights[1]
-    dw = (np.log(gt_w / ex_w)) / weights[2]
-    dh = (np.log(gt_h / ex_h)) / weights[3]
-
-    targets = np.vstack([dx, dy, dw, dh]).transpose()
-    return targets
-
-
-def _expand_bbox_targets(bbox_targets_input, class_nums, is_cls_agnostic):
-    class_labels = bbox_targets_input[:, 0]
-    fg_inds = np.where(class_labels > 0)[0]
-    # if is_cls_agnostic:
-    #     class_labels = [1 if ll > 0 else 0 for ll in class_labels]
-    #     class_labels = np.array(class_labels, dtype=np.int32)
-    #     class_nums = 2
-    bbox_targets = np.zeros(
-        (
-            class_labels.shape[0],
-            4 * class_nums if not is_cls_agnostic else 4 * 2,
-        )
-    )
-    bbox_inside_weights = np.zeros(bbox_targets.shape)
-    for ind in fg_inds:
-        class_label = int(class_labels[ind]) if not is_cls_agnostic else 1
-        start_ind = class_label * 4
-        end_ind = class_label * 4 + 4
-        bbox_targets[ind, start_ind:end_ind] = bbox_targets_input[ind, 1:]
-        bbox_inside_weights[ind, start_ind:end_ind] = (1.0, 1.0, 1.0, 1.0)
-    return bbox_targets, bbox_inside_weights
-
-
-class TestGenerateProposalLabelsOp(OpTest):
-    def set_data(self):
-        # self.use_random = False
-        self.init_use_random()
-        self.init_test_params()
-        self.init_test_input()
-        self.init_test_cascade()
-        self.init_test_output()
-
-        self.inputs = {
-            'RpnRois': (self.rpn_rois[0], self.rpn_rois_lod),
-            'GtClasses': (self.gt_classes[0], self.gts_lod),
-            'IsCrowd': (self.is_crowd[0], self.gts_lod),
-            'GtBoxes': (self.gt_boxes[0], self.gts_lod),
-            'ImInfo': self.im_info,
-        }
-        if self.max_overlaps is not None:
-            self.inputs['MaxOverlap'] = (
-                self.max_overlaps[0],
-                self.rpn_rois_lod,
-            )
-
-        self.attrs = {
-            'batch_size_per_im': self.batch_size_per_im,
-            'fg_fraction': self.fg_fraction,
-            'fg_thresh': self.fg_thresh,
-            'bg_thresh_hi': self.bg_thresh_hi,
-            'bg_thresh_lo': self.bg_thresh_lo,
-            'bbox_reg_weights': self.bbox_reg_weights,
-            'class_nums': self.class_nums,
-            'use_random': self.use_random,
-            'is_cls_agnostic': self.is_cls_agnostic,
-            'is_cascade_rcnn': self.is_cascade_rcnn,
-        }
-        self.outputs = {
-            'Rois': (self.rois, [self.lod]),
-            'LabelsInt32': (self.labels_int32, [self.lod]),
-            'BboxTargets': (self.bbox_targets, [self.lod]),
-            'BboxInsideWeights': (self.bbox_inside_weights, [self.lod]),
-            'BboxOutsideWeights': (self.bbox_outside_weights, [self.lod]),
-            'MaxOverlapWithGT': (self.max_overlap_with_gt, [self.lod]),
-        }
-
-    def test_check_output(self):
-        # NODE(yjjiang11): This op will be deprecated.
-        self.check_output(check_dygraph=False)
-
-    def setUp(self):
-        self.op_type = 'generate_proposal_labels'
-        self.set_data()
-
-    def init_test_cascade(
-        self,
-    ):
-        self.is_cascade_rcnn = False
-        self.max_overlaps = None
-
-    def init_use_random(self):
-        self.use_random = False
-
-    def init_test_params(self):
-        self.batch_size_per_im = 100
-        self.fg_fraction = 0.25
-        self.fg_thresh = 0.5
-        self.bg_thresh_hi = 0.5
-        self.bg_thresh_lo = 0.0
-        self.bbox_reg_weights = [0.1, 0.1, 0.2, 0.2]
-        self.is_cls_agnostic = False
-        self.class_nums = 2 if self.is_cls_agnostic else 81
-
-    def init_test_input(self):
-        np.random.seed(0)
-        gt_nums = 6  # Keep same with batch_size_per_im for unittest
-        proposal_nums = 200
-        images_shape = [[64, 64]]
-        self.im_info = np.ones((len(images_shape), 3)).astype(np.float32)
-        for i in range(len(images_shape)):
-            self.im_info[i, 0] = images_shape[i][0]
-            self.im_info[i, 1] = images_shape[i][1]
-            self.im_info[i, 2] = 0.8  # scale
-
-        self.rpn_rois, self.rpn_rois_lod = _generate_proposals(
-            images_shape, proposal_nums
-        )
-        ground_truth, self.gts_lod = _generate_groundtruth(
-            images_shape, self.class_nums, gt_nums
-        )
-
-        self.gt_classes = [gt['gt_classes'] for gt in ground_truth]
-        self.gt_boxes = [gt['boxes'] for gt in ground_truth]
-        self.is_crowd = [gt['is_crowd'] for gt in ground_truth]
-
-    def init_test_output(self):
-        (
-            self.rois,
-            self.labels_int32,
-            self.bbox_targets,
-            self.bbox_inside_weights,
-            self.bbox_outside_weights,
-            self.max_overlap_with_gt,
-            self.lod,
-        ) = generate_proposal_labels_in_python(
-            self.rpn_rois,
-            self.gt_classes,
-            self.is_crowd,
-            self.gt_boxes,
-            self.im_info,
-            self.batch_size_per_im,
-            self.fg_fraction,
-            self.fg_thresh,
-            self.bg_thresh_hi,
-            self.bg_thresh_lo,
-            self.bbox_reg_weights,
-            self.class_nums,
-            self.use_random,
-            self.is_cls_agnostic,
-            self.is_cascade_rcnn,
-            self.max_overlaps,
-        )
-        self.rois = np.vstack(self.rois)
-        self.labels_int32 = np.hstack(self.labels_int32)
-        self.labels_int32 = self.labels_int32[:, np.newaxis]
-        self.bbox_targets = np.vstack(self.bbox_targets)
-        self.bbox_inside_weights = np.vstack(self.bbox_inside_weights)
-        self.bbox_outside_weights = np.vstack(self.bbox_outside_weights)
-        self.max_overlap_with_gt = np.concatenate(self.max_overlap_with_gt)
-
-
-class TestCascade(TestGenerateProposalLabelsOp):
-    def init_test_cascade(self):
-        self.is_cascade_rcnn = True
-        roi_num = len(self.rpn_rois[0])
-        self.max_overlaps = []
-        max_overlap = np.random.rand(roi_num).astype('float32')
-        # Make GT samples with overlap = 1
-        max_overlap[max_overlap > 0.9] = 1.0
-        self.max_overlaps.append(max_overlap)
-
-
-class TestUseRandom(TestGenerateProposalLabelsOp):
-    def init_use_random(self):
-        self.use_random = True
-        self.is_cascade_rcnn = False
-
-    def test_check_output(self):
-        self.check_output_customized(self.verify_out)
-
-    def verify_out(self, outs):
-        print("skip")
-
-    def init_test_params(self):
-        self.batch_size_per_im = 512
-        self.fg_fraction = 0.025
-        self.fg_thresh = 0.5
-        self.bg_thresh_hi = 0.5
-        self.bg_thresh_lo = 0.0
-        self.bbox_reg_weights = [0.1, 0.1, 0.2, 0.2]
-        self.is_cls_agnostic = False
-        self.class_nums = 2 if self.is_cls_agnostic else 81
-
-
-class TestClsAgnostic(TestCascade):
-    def init_test_params(self):
-        self.batch_size_per_im = 512
-        self.fg_fraction = 0.25
-        self.fg_thresh = 0.5
-        self.bg_thresh_hi = 0.5
-        self.bg_thresh_lo = 0.0
-        self.bbox_reg_weights = [0.1, 0.1, 0.2, 0.2]
-        self.is_cls_agnostic = True
-        self.class_nums = 2 if self.is_cls_agnostic else 81
-
-
-class TestOnlyGT(TestCascade):
-    def init_test_input(self):
-        np.random.seed(0)
-        gt_nums = 6  # Keep same with batch_size_per_im for unittest
-        proposal_nums = 6
-        images_shape = [[64, 64]]
-        self.im_info = np.ones((len(images_shape), 3)).astype(np.float32)
-        for i in range(len(images_shape)):
-            self.im_info[i, 0] = images_shape[i][0]
-            self.im_info[i, 1] = images_shape[i][1]
-            self.im_info[i, 2] = 0.8  # scale
-
-        ground_truth, self.gts_lod = _generate_groundtruth(
-            images_shape, self.class_nums, gt_nums
-        )
-
-        self.gt_classes = [gt['gt_classes'] for gt in ground_truth]
-        self.gt_boxes = [gt['boxes'] for gt in ground_truth]
-        self.is_crowd = [gt['is_crowd'] for gt in ground_truth]
-        self.rpn_rois = self.gt_boxes
-        self.rpn_rois_lod = self.gts_lod
-
-
-class TestOnlyGT2(TestCascade):
-    def init_test_cascade(self):
-        self.is_cascade_rcnn = True
-        roi_num = len(self.rpn_rois[0])
-        self.max_overlaps = []
-        max_overlap = np.ones(roi_num).astype('float32')
-        self.max_overlaps.append(max_overlap)
-
-
-def _generate_proposals(images_shape, proposal_nums):
-    rpn_rois = []
-    rpn_rois_lod = []
-    num_proposals = 0
-    for i, image_shape in enumerate(images_shape):
-        proposals = _generate_boxes(image_shape, proposal_nums)
-        rpn_rois.append(proposals)
-        num_proposals = len(proposals)
-        rpn_rois_lod.append(num_proposals)
-    return rpn_rois, [rpn_rois_lod]
-
-
-def _generate_groundtruth(images_shape, class_nums, gt_nums):
-    ground_truth = []
-    gts_lod = []
-    num_gts = 0
-    for i, image_shape in enumerate(images_shape):
-        # Avoid background
-        gt_classes = np.random.randint(
-            low=1, high=class_nums, size=gt_nums
-        ).astype(np.int32)
-        gt_boxes = _generate_boxes(image_shape, gt_nums)
-        is_crowd = np.zeros((gt_nums), dtype=np.int32)
-        is_crowd[0] = 1
-        ground_truth.append(
-            {'gt_classes': gt_classes, 'boxes': gt_boxes, 'is_crowd': is_crowd}
-        )
-        num_gts += len(gt_classes)
-        gts_lod.append(num_gts)
-    return ground_truth, [gts_lod]
-
-
-def _generate_boxes(image_size, box_nums):
-    width = image_size[0]
-    height = image_size[1]
-    xywh = np.random.rand(box_nums, 4)
-    xy1 = xywh[:, [0, 1]] * image_size
-    wh = xywh[:, [2, 3]] * (image_size - xy1)
-    xy2 = xy1 + wh
-    boxes = np.hstack([xy1, xy2])
-    boxes[:, [0, 2]] = np.minimum(
-        width - 1.0, np.maximum(0.0, boxes[:, [0, 2]])
-    )
-    boxes[:, [1, 3]] = np.minimum(
-        height - 1.0, np.maximum(0.0, boxes[:, [1, 3]])
-    )
-    return boxes.astype(np.float32)
-
-
-if __name__ == '__main__':
-    unittest.main()