Skip to content

Commit 652d1bd

Browse files
ZHUIAnnaTrainingG
authored andcommitted
Add segment apis to paddle.incubate (PaddlePaddle#35759)
1 parent 1617139 commit 652d1bd

File tree

5 files changed

+325
-3
lines changed

5 files changed

+325
-3
lines changed

python/paddle/fluid/tests/unittests/test_segment_ops.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,11 @@
1515
from __future__ import print_function
1616

1717
import unittest
18-
import numpy as np
1918
import sys
19+
20+
import numpy as np
21+
import paddle
22+
2023
from op_test import OpTest
2124

2225

@@ -198,5 +201,62 @@ def prepare(self):
198201
self.attrs = {'pooltype': "MEAN"}
199202

200203

204+
class API_SegmentOpsTest(unittest.TestCase):
205+
def test_static(self):
206+
with paddle.static.program_guard(paddle.static.Program()):
207+
x = paddle.static.data(name="x", shape=[3, 3], dtype="float32")
208+
y = paddle.static.data(name='y', shape=[3], dtype='int32')
209+
210+
res_sum = paddle.incubate.segment_sum(x, y)
211+
res_mean = paddle.incubate.segment_mean(x, y)
212+
res_max = paddle.incubate.segment_max(x, y)
213+
res_min = paddle.incubate.segment_min(x, y)
214+
215+
exe = paddle.static.Executor(paddle.CPUPlace())
216+
data1 = np.array([[1, 2, 3], [3, 2, 1], [4, 5, 6]], dtype='float32')
217+
data2 = np.array([0, 0, 1], dtype="int32")
218+
219+
np_sum = np.array([[4, 4, 4], [4, 5, 6]], dtype="float32")
220+
np_mean = np.array([[2, 2, 2], [4, 5, 6]], dtype="float32")
221+
np_max = np.array([[3, 2, 3], [4, 5, 6]], dtype="float32")
222+
np_min = np.array([[1, 2, 1], [4, 5, 6]], dtype="float32")
223+
224+
ret = exe.run(feed={'x': data1,
225+
'y': data2},
226+
fetch_list=[res_sum, res_mean, res_max, res_min])
227+
228+
for np_res, ret_res in zip([np_sum, np_mean, np_max, np_min], ret):
229+
self.assertTrue(
230+
np.allclose(
231+
np_res, ret_res, atol=1e-6),
232+
"two value is\
233+
{}\n{}, check diff!".format(np_res, ret_res))
234+
235+
def test_dygraph(self):
236+
device = paddle.CPUPlace()
237+
with paddle.fluid.dygraph.guard(device):
238+
x = paddle.to_tensor(
239+
[[1, 2, 3], [3, 2, 1], [4, 5, 6]], dtype='float32')
240+
y = paddle.to_tensor([0, 0, 1], dtype="int32")
241+
res_sum = paddle.incubate.segment_sum(x, y)
242+
res_mean = paddle.incubate.segment_mean(x, y)
243+
res_max = paddle.incubate.segment_max(x, y)
244+
res_min = paddle.incubate.segment_min(x, y)
245+
246+
np_sum = np.array([[4, 4, 4], [4, 5, 6]], dtype="float32")
247+
np_mean = np.array([[2, 2, 2], [4, 5, 6]], dtype="float32")
248+
np_max = np.array([[3, 2, 3], [4, 5, 6]], dtype="float32")
249+
np_min = np.array([[1, 2, 1], [4, 5, 6]], dtype="float32")
250+
251+
ret = [res_sum, res_mean, res_max, res_min]
252+
253+
for np_res, ret_res in zip([np_sum, np_mean, np_max, np_min], ret):
254+
self.assertTrue(
255+
np.allclose(
256+
np_res, ret_res.numpy(), atol=1e-6),
257+
"two value is\
258+
{}\n{}, check diff!".format(np_res, ret_res))
259+
260+
201261
if __name__ == '__main__':
202262
unittest.main()

python/paddle/incubate/__init__.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,18 @@
1818
from ..fluid.layer_helper import LayerHelper # noqa: F401
1919
from .operators import softmax_mask_fuse_upper_triangle # noqa: F401
2020
from .operators import softmax_mask_fuse # noqa: F401
21+
from .tensor import segment_sum
22+
from .tensor import segment_mean
23+
from .tensor import segment_max
24+
from .tensor import segment_min
2125

22-
__all__ = [ # noqa
23-
'LookAhead', 'ModelAverage', 'softmax_mask_fuse_upper_triangle', 'softmax_mask_fuse'
26+
__all__ = [
27+
'LookAhead',
28+
'ModelAverage',
29+
'softmax_mask_fuse_upper_triangle',
30+
'softmax_mask_fuse',
31+
'segment_sum',
32+
'segment_mean',
33+
'segment_max',
34+
'segment_min',
2435
]
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from .math import segment_sum
16+
from .math import segment_mean
17+
from .math import segment_max
18+
from .math import segment_min
19+
20+
__all__ = [
21+
'segment_sum',
22+
'segment_mean',
23+
'segment_max',
24+
'segment_min',
25+
]
Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
__all__ = [
16+
'segment_sum',
17+
'segment_mean',
18+
'segment_max',
19+
'segment_min',
20+
]
21+
22+
import paddle
23+
24+
from paddle.fluid.layer_helper import LayerHelper, in_dygraph_mode
25+
from paddle.fluid.data_feeder import check_variable_and_dtype
26+
from paddle import _C_ops
27+
28+
29+
def segment_sum(data, segment_ids, name=None):
30+
"""
31+
Segment Sum Operator.
32+
33+
This operator sums the elements of input `data` which with
34+
the same index in `segment_ids`.
35+
It computes a tensor such that $out_i = \\sum_{j} data_{j}$
36+
where sum is over j such that `segment_ids[j] == i`.
37+
38+
Args:
39+
data (Tensor): A tensor, available data type float32, float64.
40+
segment_ids (Tensor): A 1-D tensor, which have the same size
41+
with the first dimension of input data.
42+
Available data type is int32, int64.
43+
Returns:
44+
output (Tensor): the reduced result.
45+
46+
Examples:
47+
48+
.. code-block:: python
49+
50+
import paddle
51+
data = paddle.to_tensor([[1, 2, 3], [3, 2, 1], [4, 5, 6]], dtype='float32')
52+
segment_ids = paddle.to_tensor([0, 0, 1], dtype='int32')
53+
out = paddle.incubate.segment_sum(data, segment_ids)
54+
#Outputs: [[4., 4., 4.], [4., 5., 6.]]
55+
56+
"""
57+
if in_dygraph_mode():
58+
out, tmp = _C_ops.segment_pool(data, segment_ids, 'pooltype', "SUM")
59+
return out
60+
61+
check_variable_and_dtype(data, "X", ("float32", "float64"), "segment_pool")
62+
check_variable_and_dtype(segment_ids, "SegmentIds", ("int32", "int64"),
63+
"segment_pool")
64+
65+
helper = LayerHelper("segment_sum", **locals())
66+
out = helper.create_variable_for_type_inference(dtype=data.dtype)
67+
summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype)
68+
helper.append_op(
69+
type="segment_pool",
70+
inputs={"X": data,
71+
"SegmentIds": segment_ids},
72+
outputs={"Out": out,
73+
"SummedIds": summed_ids},
74+
attrs={"pooltype": "SUM"})
75+
return out
76+
77+
78+
def segment_mean(data, segment_ids, name=None):
79+
"""
80+
Segment mean Operator.
81+
82+
Ihis operator calculate the mean value of input `data` which
83+
with the same index in `segment_ids`.
84+
It computes a tensor such that $out_i = \\frac{1}{n_i} \\sum_{j} data[j]$
85+
where sum is over j such that 'segment_ids[j] == i' and $n_i$ is the number
86+
of all index 'segment_ids[j] == i'.
87+
88+
Args:
89+
data (tensor): a tensor, available data type float32, float64.
90+
segment_ids (tensor): a 1-d tensor, which have the same size
91+
with the first dimension of input data.
92+
available data type is int32, int64.
93+
94+
Returns:
95+
output (Tensor): the reduced result.
96+
97+
Examples:
98+
99+
.. code-block:: python
100+
101+
import paddle
102+
data = paddle.to_tensor([[1, 2, 3], [3, 2, 1], [4, 5, 6]], dtype='float32')
103+
segment_ids = paddle.to_tensor([0, 0, 1], dtype='int32')
104+
out = paddle.incubate.segment_mean(data, segment_ids)
105+
#Outputs: [[2., 2., 2.], [4., 5., 6.]]
106+
107+
"""
108+
if in_dygraph_mode():
109+
out, tmp = _C_ops.segment_pool(data, segment_ids, 'pooltype', "MEAN")
110+
return out
111+
112+
check_variable_and_dtype(data, "X", ("float32", "float64"), "segment_pool")
113+
check_variable_and_dtype(segment_ids, "SegmentIds", ("int32", "int64"),
114+
"segment_pool")
115+
116+
helper = LayerHelper("segment_mean", **locals())
117+
out = helper.create_variable_for_type_inference(dtype=data.dtype)
118+
summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype)
119+
helper.append_op(
120+
type="segment_pool",
121+
inputs={"X": data,
122+
"SegmentIds": segment_ids},
123+
outputs={"Out": out,
124+
"SummedIds": summed_ids},
125+
attrs={"pooltype": "MEAN"})
126+
return out
127+
128+
129+
def segment_min(data, segment_ids, name=None):
130+
"""
131+
Segment min operator.
132+
133+
This operator calculate the minimum elements of input `data` which with
134+
the same index in `segment_ids`.
135+
It computes a tensor such that $out_i = \\min_{j} data_{j}$
136+
where min is over j such that `segment_ids[j] == i`.
137+
138+
Args:
139+
data (tensor): a tensor, available data type float32, float64.
140+
segment_ids (tensor): a 1-d tensor, which have the same size
141+
with the first dimension of input data.
142+
available data type is int32, int64.
143+
Returns:
144+
output (Tensor): the reduced result.
145+
146+
Examples:
147+
148+
.. code-block:: python
149+
150+
import paddle
151+
data = paddle.to_tensor([[1, 2, 3], [3, 2, 1], [4, 5, 6]], dtype='float32')
152+
segment_ids = paddle.to_tensor([0, 0, 1], dtype='int32')
153+
out = paddle.incubate.segment_min(data, segment_ids)
154+
#Outputs: [[1., 2., 1.], [4., 5., 6.]]
155+
156+
"""
157+
if in_dygraph_mode():
158+
out, tmp = _C_ops.segment_pool(data, segment_ids, 'pooltype', "MIN")
159+
return out
160+
161+
check_variable_and_dtype(data, "X", ("float32", "float64"), "segment_pool")
162+
check_variable_and_dtype(segment_ids, "SegmentIds", ("int32", "int64"),
163+
"segment_pool")
164+
165+
helper = LayerHelper("segment_min", **locals())
166+
out = helper.create_variable_for_type_inference(dtype=data.dtype)
167+
summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype)
168+
helper.append_op(
169+
type="segment_pool",
170+
inputs={"X": data,
171+
"SegmentIds": segment_ids},
172+
outputs={"Out": out,
173+
"SummedIds": summed_ids},
174+
attrs={"pooltype": "MIN"})
175+
return out
176+
177+
178+
def segment_max(data, segment_ids, name=None):
179+
"""
180+
Segment max operator.
181+
182+
This operator calculate the maximum elements of input `data` which with
183+
the same index in `segment_ids`.
184+
It computes a tensor such that $out_i = \\min_{j} data_{j}$
185+
where max is over j such that `segment_ids[j] == i`.
186+
187+
Args:
188+
data (tensor): a tensor, available data type float32, float64.
189+
segment_ids (tensor): a 1-d tensor, which have the same size
190+
with the first dimension of input data.
191+
available data type is int32, int64.
192+
193+
Returns:
194+
output (Tensor): the reduced result.
195+
196+
Examples:
197+
198+
.. code-block:: python
199+
200+
import paddle
201+
data = paddle.to_tensor([[1, 2, 3], [3, 2, 1], [4, 5, 6]], dtype='float32')
202+
segment_ids = paddle.to_tensor([0, 0, 1], dtype='int32')
203+
out = paddle.incubate.segment_max(data, segment_ids)
204+
#Outputs: [[3., 2., 3.], [4., 5., 6.]]
205+
206+
"""
207+
if in_dygraph_mode():
208+
out, tmp = _C_ops.segment_pool(data, segment_ids, 'pooltype', "MAX")
209+
return out
210+
211+
check_variable_and_dtype(data, "X", ("float32", "float64"), "segment_pool")
212+
check_variable_and_dtype(segment_ids, "SegmentIds", ("int32", "int64"),
213+
"segment_pool")
214+
215+
helper = LayerHelper("segment_max", **locals())
216+
out = helper.create_variable_for_type_inference(dtype=data.dtype)
217+
summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype)
218+
helper.append_op(
219+
type="segment_pool",
220+
inputs={"X": data,
221+
"SegmentIds": segment_ids},
222+
outputs={"Out": out,
223+
"SummedIds": summed_ids},
224+
attrs={"pooltype": "MAX"})
225+
return out

python/setup.py.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ packages=['paddle',
162162
'paddle.incubate.optimizer',
163163
'paddle.incubate.checkpoint',
164164
'paddle.incubate.operators',
165+
'paddle.incubate.tensor',
165166
'paddle.distributed.fleet',
166167
'paddle.distributed.fleet.base',
167168
'paddle.distributed.fleet.elastic',

0 commit comments

Comments
 (0)