4343 globals ()[_OP ] = generate_layer_fn (_OP )
4444
4545
46- def detection_output (scores ,
47- loc ,
46+ def detection_output (loc ,
47+ scores ,
4848 prior_box ,
4949 prior_box_var ,
5050 background_label = 0 ,
@@ -61,14 +61,14 @@ def detection_output(scores,
6161 be zero if there is no valid bounding box.
6262
6363 Args:
64- scores(Variable): A 3-D Tensor with shape [N, C, M] represents the
65- predicted confidence predictions. N is the batch size, C is the
66- class number, M is number of bounding boxes. For each category
67- there are total M scores which corresponding M bounding boxes.
6864 loc(Variable): A 3-D Tensor with shape [N, M, 4] represents the
6965 predicted locations of M bounding bboxes. N is the batch size,
7066 and each bounding box has four coordinate values and the layout
7167 is [xmin, ymin, xmax, ymax].
68+ scores(Variable): A 3-D Tensor with shape [N, M, C] represents the
69+ predicted confidence predictions. N is the batch size, C is the
70+ class number, M is number of bounding boxes. For each category
71+ there are total M scores which corresponding M bounding boxes.
7272 prior_box(Variable): A 2-D Tensor with shape [M, 4] holds M boxes,
7373 each box is represented as [xmin, ymin, xmax, ymax],
7474 [xmin, ymin] is the left top coordinate of the anchor box,
@@ -100,7 +100,7 @@ class number, M is number of bounding boxes. For each category
100100 append_batch_size=False, dtype='float32')
101101 pbv = layers.data(name='prior_box_var', shape=[10, 4],
102102 append_batch_size=False, dtype='float32')
103- loc = layers.data(name='target_box', shape=[21, 4],
103+ loc = layers.data(name='target_box', shape=[2, 21, 4],
104104 append_batch_size=False, dtype='float32')
105105 scores = layers.data(name='scores', shape=[2, 21, 10],
106106 append_batch_size=False, dtype='float32')
@@ -109,7 +109,6 @@ class number, M is number of bounding boxes. For each category
109109 prior_box=pb,
110110 prior_box_var=pbv)
111111 """
112-
113112 helper = LayerHelper ("detection_output" , ** locals ())
114113 decoded_box = box_coder (
115114 prior_box = prior_box ,
@@ -118,6 +117,7 @@ class number, M is number of bounding boxes. For each category
118117 code_type = 'decode_center_size' )
119118
120119 nmsed_outs = helper .create_tmp_variable (dtype = decoded_box .dtype )
120+ scores = nn .transpose (scores , perm = [0 , 2 , 1 ])
121121 helper .append_op (
122122 type = "multiclass_nms" ,
123123 inputs = {'Scores' : scores ,
@@ -595,12 +595,13 @@ def multi_box_head(inputs,
595595 name(str): Name of the prior box layer. Default: None.
596596
597597 Returns:
598- mbox_loc(list): The predicted boxes' location of the inputs.
599- The layout of each element is [N, H, W, Priors]. Priors
600- is the number of predicted boxof each position of each input.
601- mbox_conf(list): The predicted boxes' confidence of the inputs.
602- The layout of each element is [N, H, W, Priors]. Priors
603- is the number of predicted box of each position of each input.
598+ mbox_loc(Variable): The predicted boxes' location of the inputs.
599+ The layout is [N, H*W*Priors, 4]. where Priors
600+ is the number of predicted boxes each position of each input.
601+ mbox_conf(Variable): The predicted boxes' confidence of the inputs.
602+ The layout is [N, H*W*Priors, C]. where Priors
603+ is the number of predicted boxes each position of each input
604+ and C is the number of Classes.
604605 boxes(Variable): the output prior boxes of PriorBox.
605606 The layout is [num_priors, 4]. num_priors is the total
606607 box count of each position of inputs.
@@ -751,7 +752,7 @@ def _is_list_or_tuple_and_equal(data, length, err_info):
751752 num_boxes = box .shape [2 ]
752753
753754 # get box_loc
754- num_loc_output = num_boxes * num_classes * 4
755+ num_loc_output = num_boxes * 4
755756 mbox_loc = nn .conv2d (
756757 input = input ,
757758 num_filters = num_loc_output ,
@@ -760,7 +761,12 @@ def _is_list_or_tuple_and_equal(data, length, err_info):
760761 stride = stride )
761762
762763 mbox_loc = nn .transpose (mbox_loc , perm = [0 , 2 , 3 , 1 ])
763- mbox_locs .append (mbox_loc )
764+ new_shape = [
765+ mbox_loc .shape [0 ],
766+ mbox_loc .shape [1 ] * mbox_loc .shape [2 ] * mbox_loc .shape [3 ] / 4 , 4
767+ ]
768+ mbox_loc_flatten = ops .reshape (mbox_loc , shape = new_shape )
769+ mbox_locs .append (mbox_loc_flatten )
764770
765771 # get conf_loc
766772 num_conf_output = num_boxes * num_classes
@@ -771,11 +777,18 @@ def _is_list_or_tuple_and_equal(data, length, err_info):
771777 padding = pad ,
772778 stride = stride )
773779 conf_loc = nn .transpose (conf_loc , perm = [0 , 2 , 3 , 1 ])
774- mbox_confs .append (conf_loc )
780+ new_shape = [
781+ conf_loc .shape [0 ], conf_loc .shape [1 ] * conf_loc .shape [2 ] *
782+ conf_loc .shape [3 ] / num_classes , num_classes
783+ ]
784+ conf_loc_flatten = ops .reshape (conf_loc , shape = new_shape )
785+ mbox_confs .append (conf_loc_flatten )
775786
776787 if len (box_results ) == 1 :
777788 box = box_results [0 ]
778789 var = var_results [0 ]
790+ mbox_locs_concat = mbox_locs [0 ]
791+ mbox_confs_concat = mbox_confs [0 ]
779792 else :
780793 reshaped_boxes = []
781794 reshaped_vars = []
@@ -785,5 +798,7 @@ def _is_list_or_tuple_and_equal(data, length, err_info):
785798
786799 box = tensor .concat (reshaped_boxes )
787800 var = tensor .concat (reshaped_vars )
801+ mbox_locs_concat = tensor .concat (mbox_locs , axis = 1 )
802+ mbox_confs_concat = tensor .concat (mbox_confs , axis = 1 )
788803
789- return mbox_locs , mbox_confs , box , var
804+ return mbox_locs_concat , mbox_confs_concat , box , var
0 commit comments