PaddlePaddle · qingqing01 · Dec 22, 2016 · Dec 13, 2016 · Dec 14, 2016 · Dec 14, 2016
diff --git a/paddle/gserver/layers/PriorBox.cpp b/paddle/gserver/layers/PriorBox.cpp
@@ -0,0 +1,157 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "Layer.h"
+#include "paddle/math/Matrix.h"
+#include "paddle/math/BaseMatrix.h"
+
+namespace paddle {
+
+class PriorBoxLayer : public Layer {
+public:
+  explicit PriorBoxLayer(const LayerConfig& config) : Layer(config) {}
+  bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
+  void forward(PassType passType);
+  void backward(const UpdateCallback& callback) {}
+  void forwardImp(const Argument& featureMap, const Argument& imageShape);
+  int numPriors_;
+  std::vector<int> minSize_;
+  std::vector<int> maxSize_;
+  std::vector<float> aspectRatio_;
+  std::vector<float> variance_;
+  std::vector<Argument> tmpCpuInput_;
+  MatrixPtr buffer_;
+};
+
+bool PriorBoxLayer::init(const LayerMap& layerMap,
+                         const ParameterMap& parameterMap) {
+  Layer::init(layerMap, parameterMap);
+  auto pb_conf = config_.inputs(0).priorbox_conf();
+  std::copy(pb_conf.min_size().begin(),
+            pb_conf.min_size().end(),
+            std::back_inserter(minSize_));
+  std::copy(pb_conf.max_size().begin(),
+            pb_conf.max_size().end(),
+            std::back_inserter(maxSize_));
+  std::copy(pb_conf.aspect_ratio().begin(),
+            pb_conf.aspect_ratio().end(),
+            std::back_inserter(aspectRatio_));
+  std::copy(pb_conf.variance().begin(),
+            pb_conf.variance().end(),
+            std::back_inserter(variance_));
+  // flip
+  int input_ratio_length = aspectRatio_.size();
+  for (int index = 0; index < input_ratio_length; index++)
+    aspectRatio_.push_back(1 / aspectRatio_[index]);
+  aspectRatio_.push_back(1.);
+  numPriors_ = aspectRatio_.size();
+  if (maxSize_.size() > 0) numPriors_++;
+  buffer_ = Matrix::create(1, 1, false, false);
+  if (useGpu_) {
+    tmpCpuInput_.reserve(inputLayers_.size());
+    for (size_t i = 0; i < inputLayers_.size(); i++) {
+      tmpCpuInput_.push_back(Argument());
+    }
+  }
+  return true;
+}
+
+void PriorBoxLayer::forward(PassType passType) {
+  Layer::forward(passType);
+  if (useGpu_) {
+    for (size_t i = 0; i < inputLayers_.size(); i++) {
+      tmpCpuInput_[i].resizeAndCopyFrom(
+          getInput(i), false, HPPL_STREAM_DEFAULT);
+      hl_stream_synchronize(HPPL_STREAM_DEFAULT);
+      forwardImp(tmpCpuInput_[0], tmpCpuInput_[1]);
+    }
+  } else {
+    forwardImp(getInput(0), getInput(1));
+  }
+}
+
+void PriorBoxLayer::forwardImp(const Argument& featureMap,
+                               const Argument& imageShape) {
+  int layer_width = featureMap.getFrameWidth();
+  int layer_height = featureMap.getFrameHeight();
+
+  MatrixPtr inV1 = imageShape.value;
+  int image_width = inV1->getElement(0, 0);
+  int image_height = inV1->getElement(0, 1);
+  float step_w = static_cast<float>(image_width) / layer_width;
+  float step_h = static_cast<float>(image_height) / layer_height;
+  int dim = layer_height * layer_width * numPriors_ * 4;
+  reserveOutput(1, dim * 2);
+  // use a cpu buffer to compute
+  Matrix::resizeOrCreate(buffer_, 1, dim * 2, false, false);
+  auto* tmp_ptr = buffer_->getData();
+
+  int idx = 0;
+  for (int h = 0; h < layer_height; ++h) {
+    for (int w = 0; w < layer_width; ++w) {
+      float center_x = (w + 0.5) * step_w;
+      float center_y = (h + 0.5) * step_h;
+      int min_size = 0;
+      for (size_t s = 0; s < minSize_.size(); s++) {
+        // first prior.
+        min_size = minSize_[s];
+        int box_width = min_size;
+        int box_height = min_size;
+        // xmin, ymin, xmax, ymax.
+        tmp_ptr[idx++] = (center_x - box_width / 2.) / image_width;
+        tmp_ptr[idx++] = (center_y - box_height / 2.) / image_height;
+        tmp_ptr[idx++] = (center_x + box_width / 2.) / image_width;
+        tmp_ptr[idx++] = (center_y + box_height / 2.) / image_height;
+
+        if (maxSize_.size() > 0) {
+          CHECK_EQ(minSize_.size(), maxSize_.size());
+          // second prior.
+          for (size_t s = 0; s < maxSize_.size(); s++) {
+            int max_size = maxSize_[s];
+            box_width = box_height = sqrt(min_size * max_size);
+            tmp_ptr[idx++] = (center_x - box_width / 2.) / image_width;
+            tmp_ptr[idx++] = (center_y - box_height / 2.) / image_height;
+            tmp_ptr[idx++] = (center_x + box_width / 2.) / image_width;
+            tmp_ptr[idx++] = (center_y + box_height / 2.) / image_height;
+          }
+        }
+      }
+      // rest of priors.
+      for (size_t r = 0; r < aspectRatio_.size(); r++) {
+        float ar = aspectRatio_[r];
+        if (fabs(ar - 1.) < 1e-6) continue;
+        float box_width = min_size * sqrt(ar);
+        float box_height = min_size / sqrt(ar);
+        tmp_ptr[idx++] = (center_x - box_width / 2.) / image_width;
+        tmp_ptr[idx++] = (center_y - box_height / 2.) / image_height;
+        tmp_ptr[idx++] = (center_x + box_width / 2.) / image_width;
+        tmp_ptr[idx++] = (center_y + box_height / 2.) / image_height;
+      }
+    }
+  }
+  // clip the prior's coordidate such that it is within [0, 1]
+  for (int d = 0; d < dim; ++d)
+    tmp_ptr[d] = std::min(std::max(tmp_ptr[d], (float)0.), (float)1.);
+  // set the variance.
+  for (int h = 0; h < layer_height; h++)
+    for (int w = 0; w < layer_width; w++)
+      for (int i = 0; i < numPriors_; i++)
+        for (int j = 0; j < 4; j++) tmp_ptr[idx++] = variance_[j];
+  MatrixPtr outV = getOutputValue();
+  outV->copyFrom(buffer_->data_, dim * 2);
+}
+
+REGISTER_LAYER(priorbox, PriorBoxLayer);
+
+}  // namespace paddle
diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto
@@ -248,6 +248,13 @@ message ImageConfig {
   required uint32 img_size_y = 9;
 }
 
+message PriorBoxConfig {
+  repeated uint32 min_size = 1;
+  repeated uint32 max_size = 2;
+  repeated float aspect_ratio = 3;
+  repeated float variance = 4;
+}
+
 message LayerInputConfig {
   required string input_layer_name = 1;
   optional string input_parameter_name = 2;
@@ -263,6 +270,7 @@ message LayerInputConfig {
   optional BilinearInterpConfig bilinear_interp_conf = 10;
   optional MaxOutConfig maxout_conf = 11;
   optional SppConfig spp_conf = 12;
+  optional PriorBoxConfig priorbox_conf = 13;
 }
 
 message LayerConfig {

diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
@@ -1578,6 +1578,21 @@ def __init__(self, name, inputs):
         super(PrintLayer, self).__init__(name, 'print', 0, inputs)
 
 
+@config_layer('priorbox')
+class PriorBoxLayer(LayerBase):
+    def __init__(self, name, inputs, size, min_size, max_size, aspect_ratio,
+                 variance):
+        super(PriorBoxLayer, self).__init__(name, 'priorbox', 0, inputs)
+        config_assert(len(inputs) == 2, 'PriorBoxLayer must have 2 input')
+        self.config.inputs[0].priorbox_conf.min_size.extend(min_size)
+        self.config.inputs[0].priorbox_conf.max_size.extend(max_size)
+        self.config.inputs[0].priorbox_conf.aspect_ratio.extend(aspect_ratio)
+        self.config.inputs[0].priorbox_conf.variance.extend(variance)
+        self.config.size = size
+        input_layer0 = self.get_input_layer(0)
+        input_layer1 = self.get_input_layer(1)
+
+
 @config_layer('data')
 class DataLayer(LayerBase):
     def __init__(self, name, size, height=None, width=None, device=None):

diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
@@ -106,6 +106,7 @@
     'maxout_layer',
     'out_prod_layer',
     'print_layer',
+    'priorbox_layer',
     'spp_layer',
 ]
 
@@ -171,6 +172,7 @@ class LayerType(object):
     SPP_LAYER = "spp"
 
     PRINT_LAYER = "print"
+    PRIORBOX_LAYER = "priorbox"
 
     CTC_LAYER = "ctc"
     WARP_CTC_LAYER = "warp_ctc"
@@ -934,6 +936,52 @@ def print_layer(input, name=None):
     # this layer don't return anything, can not be input of other layer.
 
 
+@wrap_name_default("priorbox")
+def priorbox_layer(input,
+                   img_shape,
+                   aspect_ratio,
+                   variance,
+                   min_size,
+                   max_size=[],
+                   name=None):
+    """
+    Compute the priorbox and set the variance. This layer is necessary for ssd.
+
+    :param name: The Layer Name.
+    :type name: basestring
+    :param input: The input layer.
+    :type input: LayerOutput
+    :param img_shape: The width and height of the network input image.
+    :type img_shape: LayerOutput
+    :param aspect_ratio: The aspect ratio.
+    :type aspect_ratio: list
+    :param variance: The bounding box variance.
+    :type min_size: The min size of the priorbox width/height.
+    :param min_size: list
+    :type max_size: The max size of the priorbox width/height. Could be NULL.
+    :param max_size: list
+    :return: LayerOutput
+    """
+    # plus one for ratio 1.
+    num_filters = (len(aspect_ratio) * 2 + 1 + len(max_size)) * 4
+    size = (input.size / input.num_filters) * num_filters * 2
+    Layer(
+        name=name,
+        type=LayerType.PRIORBOX_LAYER,
+        inputs=[input.name, img_shape.name],
+        size=size,
+        min_size=min_size,
+        max_size=max_size,
+        aspect_ratio=aspect_ratio,
+        variance=variance)
+    return LayerOutput(
+        name,
+        LayerType.PRIORBOX_LAYER,
+        parents=[input, img_shape],
+        num_filters=num_filters,
+        size=size)
+
+
 @wrap_name_default("seq_pooling")
 @wrap_bias_attr_default(has_bias=False)
 @wrap_param_default(['pooling_type'], default_factory=lambda _: MaxPooling())