Skip to content

Commit f068e08

Browse files
authored
add roi_align (#35102)
* add roi_align in vision/ops.py
1 parent 6b587e9 commit f068e08

2 files changed

Lines changed: 267 additions & 0 deletions

File tree

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import unittest
16+
import numpy as np
17+
18+
import paddle
19+
from paddle.vision.ops import roi_align, RoIAlign
20+
21+
22+
class TestRoIAlign(unittest.TestCase):
23+
def setUp(self):
24+
self.data = np.random.rand(1, 256, 32, 32).astype('float32')
25+
boxes = np.random.rand(3, 4)
26+
boxes[:, 2] += boxes[:, 0] + 3
27+
boxes[:, 3] += boxes[:, 1] + 4
28+
self.boxes = boxes.astype('float32')
29+
self.boxes_num = np.array([3], dtype=np.int32)
30+
31+
def roi_align_functional(self, output_size):
32+
if isinstance(output_size, int):
33+
output_shape = (3, 256, output_size, output_size)
34+
else:
35+
output_shape = (3, 256, output_size[0], output_size[1])
36+
37+
if paddle.in_dynamic_mode():
38+
data = paddle.to_tensor(self.data)
39+
boxes = paddle.to_tensor(self.boxes)
40+
boxes_num = paddle.to_tensor(self.boxes_num)
41+
42+
align_out = roi_align(
43+
data, boxes, boxes_num=boxes_num, output_size=output_size)
44+
np.testing.assert_equal(align_out.shape, output_shape)
45+
46+
else:
47+
data = paddle.static.data(
48+
shape=self.data.shape, dtype=self.data.dtype, name='data')
49+
boxes = paddle.static.data(
50+
shape=self.boxes.shape, dtype=self.boxes.dtype, name='boxes')
51+
boxes_num = paddle.static.data(
52+
shape=self.boxes_num.shape,
53+
dtype=self.boxes_num.dtype,
54+
name='boxes_num')
55+
56+
align_out = roi_align(
57+
data, boxes, boxes_num=boxes_num, output_size=output_size)
58+
59+
place = paddle.CPUPlace()
60+
exe = paddle.static.Executor(place)
61+
62+
align_out = exe.run(paddle.static.default_main_program(),
63+
feed={
64+
'data': self.data,
65+
'boxes': self.boxes,
66+
'boxes_num': self.boxes_num
67+
},
68+
fetch_list=[align_out])
69+
70+
np.testing.assert_equal(align_out[0].shape, output_shape)
71+
72+
def test_roi_align_functional_dynamic(self):
73+
self.roi_align_functional(3)
74+
self.roi_align_functional(output_size=(3, 4))
75+
76+
def test_roi_align_functional_static(self):
77+
paddle.enable_static()
78+
self.roi_align_functional(3)
79+
paddle.disable_static()
80+
81+
def test_RoIAlign(self):
82+
roi_align_c = RoIAlign(output_size=(4, 3))
83+
data = paddle.to_tensor(self.data)
84+
boxes = paddle.to_tensor(self.boxes)
85+
boxes_num = paddle.to_tensor(self.boxes_num)
86+
87+
align_out = roi_align_c(data, boxes, boxes_num)
88+
np.testing.assert_equal(align_out.shape, (3, 256, 4, 3))
89+
90+
def test_value(self, ):
91+
data = np.array([i for i in range(1, 17)]).reshape(1, 1, 4,
92+
4).astype(np.float32)
93+
boxes = np.array(
94+
[[1., 1., 2., 2.], [1.5, 1.5, 3., 3.]]).astype(np.float32)
95+
boxes_num = np.array([2]).astype(np.int32)
96+
output = np.array([[[[6.]]], [[[9.75]]]], dtype=np.float32)
97+
98+
data = paddle.to_tensor(data)
99+
boxes = paddle.to_tensor(boxes)
100+
boxes_num = paddle.to_tensor(boxes_num)
101+
102+
roi_align_c = RoIAlign(output_size=1)
103+
align_out = roi_align_c(data, boxes, boxes_num)
104+
np.testing.assert_almost_equal(align_out.numpy(), output)
105+
106+
107+
if __name__ == '__main__':
108+
unittest.main()

python/paddle/vision/ops.py

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434
'RoIPool',
3535
'psroi_pool',
3636
'PSRoIPool',
37+
'roi_align',
38+
'RoIAlign',
3739
]
3840

3941

@@ -1138,3 +1140,160 @@ def forward(self, x, boxes, boxes_num):
11381140
def extra_repr(self):
11391141
main_str = 'output_size={_output_size}, spatial_scale={_spatial_scale}'
11401142
return main_str.format(**self.__dict__)
1143+
1144+
1145+
def roi_align(x,
1146+
boxes,
1147+
boxes_num,
1148+
output_size,
1149+
spatial_scale=1.0,
1150+
sampling_ratio=-1,
1151+
aligned=True,
1152+
name=None):
1153+
"""
1154+
This operator implements the roi_align layer.
1155+
Region of Interest (RoI) Align operator (also known as RoI Align) is to
1156+
perform bilinear interpolation on inputs of nonuniform sizes to obtain
1157+
fixed-size feature maps (e.g. 7*7), as described in Mask R-CNN.
1158+
1159+
Dividing each region proposal into equal-sized sections with the pooled_width
1160+
and pooled_height. Location remains the origin result.
1161+
1162+
In each ROI bin, the value of the four regularly sampled locations are
1163+
computed directly through bilinear interpolation. The output is the mean of
1164+
four locations. Thus avoid the misaligned problem.
1165+
1166+
Args:
1167+
x (Tensor): Input feature, 4D-Tensor with the shape of [N,C,H,W],
1168+
where N is the batch size, C is the input channel, H is Height,
1169+
W is weight. The data type is float32 or float64.
1170+
boxes (Tensor): Boxes (RoIs, Regions of Interest) to pool over. It
1171+
should be a 2-D Tensor of shape (num_boxes, 4). The data type is
1172+
float32 or float64. Given as [[x1, y1, x2, y2], ...], (x1, y1) is
1173+
the top left coordinates, and (x2, y2) is the bottom right coordinates.
1174+
boxes_num (Tensor): The number of boxes contained in each picture in
1175+
the batch, the data type is int32.
1176+
output_size (int or Tuple[int, int]): The pooled output size(h, w), data
1177+
type is int32. If int, h and w are both equal to output_size.
1178+
spatial_scale (float32): Multiplicative spatial scale factor to translate
1179+
ROI coords from their input scale to the scale used when pooling.
1180+
Default: 1.0
1181+
sampling_ratio (int32): number of sampling points in the interpolation
1182+
grid used to compute the output value of each pooled output bin.
1183+
If > 0, then exactly ``sampling_ratio x sampling_ratio`` sampling
1184+
points per bin are used.
1185+
If <= 0, then an adaptive number of grid points are used (computed
1186+
as ``ceil(roi_width / output_width)``, and likewise for height).
1187+
Default: -1
1188+
aligned (bool): If False, use the legacy implementation. If True, pixel
1189+
shift the box coordinates it by -0.5 for a better alignment with the
1190+
two neighboring pixel indices. This version is used in Detectron2.
1191+
Default: True
1192+
name(str, optional): For detailed information, please refer to :
1193+
ref:`api_guide_Name`. Usually name is no need to set and None by
1194+
default.
1195+
1196+
Returns:
1197+
Tensor: The output of ROIAlignOp is a 4-D tensor with shape (num_boxes,
1198+
channels, pooled_h, pooled_w). The data type is float32 or float64.
1199+
1200+
Examples:
1201+
.. code-block:: python
1202+
1203+
import paddle
1204+
from paddle.vision.ops import roi_align
1205+
1206+
data = paddle.rand([1, 256, 32, 32])
1207+
boxes = paddle.rand([3, 4])
1208+
boxes[:, 2] += boxes[:, 0] + 3
1209+
boxes[:, 3] += boxes[:, 1] + 4
1210+
boxes_num = paddle.to_tensor([3]).astype('int32')
1211+
align_out = roi_align(data, boxes, boxes_num, output_size=3)
1212+
assert align_out.shape == [3, 256, 3, 3]
1213+
"""
1214+
1215+
check_type(output_size, 'output_size', (int, tuple), 'roi_align')
1216+
if isinstance(output_size, int):
1217+
output_size = (output_size, output_size)
1218+
1219+
pooled_height, pooled_width = output_size
1220+
if in_dygraph_mode():
1221+
assert boxes_num is not None, "boxes_num should not be None in dygraph mode."
1222+
align_out = core.ops.roi_align(
1223+
x, boxes, boxes_num, "pooled_height", pooled_height, "pooled_width",
1224+
pooled_width, "spatial_scale", spatial_scale, "sampling_ratio",
1225+
sampling_ratio, "aligned", aligned)
1226+
return align_out
1227+
1228+
else:
1229+
check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'roi_align')
1230+
check_variable_and_dtype(boxes, 'boxes', ['float32', 'float64'],
1231+
'roi_align')
1232+
helper = LayerHelper('roi_align', **locals())
1233+
dtype = helper.input_dtype()
1234+
align_out = helper.create_variable_for_type_inference(dtype)
1235+
inputs = {
1236+
"X": x,
1237+
"ROIs": boxes,
1238+
}
1239+
if boxes_num is not None:
1240+
inputs['RoisNum'] = boxes_num
1241+
helper.append_op(
1242+
type="roi_align",
1243+
inputs=inputs,
1244+
outputs={"Out": align_out},
1245+
attrs={
1246+
"pooled_height": pooled_height,
1247+
"pooled_width": pooled_width,
1248+
"spatial_scale": spatial_scale,
1249+
"sampling_ratio": sampling_ratio,
1250+
"aligned": aligned,
1251+
})
1252+
return align_out
1253+
1254+
1255+
class RoIAlign(Layer):
1256+
"""
1257+
This interface is used to construct a callable object of the `RoIAlign` class.
1258+
Please refer to :ref:`api_paddle_vision_ops_roi_align`.
1259+
1260+
Args:
1261+
output_size (int or tuple[int, int]): The pooled output size(h, w),
1262+
data type is int32. If int, h and w are both equal to output_size.
1263+
spatial_scale (float32, optional): Multiplicative spatial scale factor
1264+
to translate ROI coords from their input scale to the scale used
1265+
when pooling. Default: 1.0
1266+
1267+
Returns:
1268+
align_out (Tensor): The output of ROIAlign operator is a 4-D tensor with
1269+
shape (num_boxes, channels, pooled_h, pooled_w).
1270+
1271+
Examples:
1272+
.. code-block:: python
1273+
1274+
import paddle
1275+
from paddle.vision.ops import RoIAlign
1276+
1277+
data = paddle.rand([1, 256, 32, 32])
1278+
boxes = paddle.rand([3, 4])
1279+
boxes[:, 2] += boxes[:, 0] + 3
1280+
boxes[:, 3] += boxes[:, 1] + 4
1281+
boxes_num = paddle.to_tensor([3]).astype('int32')
1282+
roi_align = RoIAlign(output_size=(4, 3))
1283+
align_out = roi_align(data, boxes, boxes_num)
1284+
assert align_out.shape == [3, 256, 4, 3]
1285+
"""
1286+
1287+
def __init__(self, output_size, spatial_scale=1.0):
1288+
super(RoIAlign, self).__init__()
1289+
self._output_size = output_size
1290+
self._spatial_scale = spatial_scale
1291+
1292+
def forward(self, x, boxes, boxes_num, aligned=True):
1293+
return roi_align(
1294+
x=x,
1295+
boxes=boxes,
1296+
boxes_num=boxes_num,
1297+
output_size=self._output_size,
1298+
spatial_scale=self._spatial_scale,
1299+
aligned=aligned)

0 commit comments

Comments
 (0)