Skip to content

Commit 47953ac

Browse files
【Hackathon 5th No.35】为 Paddle 新增 histogramdd API (#57880)
* add histogramdd api * add some tests for different bins type * fix * clean api func * fix atol * add some type check && add error test * fix codestyle * detail test func name * codestyle * modify range to ranges to avoid conflict, modify sample to x * modify static test to random test * coverage * Update python/paddle/tensor/linalg.py Co-authored-by: zachary sun <[email protected]> * Update python/paddle/tensor/linalg.py Co-authored-by: zachary sun <[email protected]> * Update python/paddle/tensor/linalg.py Co-authored-by: zachary sun <[email protected]> * fix doc * fix doc --------- Co-authored-by: zachary sun <[email protected]>
1 parent b12eb1e commit 47953ac

File tree

4 files changed

+717
-0
lines changed

4 files changed

+717
-0
lines changed

python/paddle/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@
156156
cholesky,
157157
bmm,
158158
histogram,
159+
histogramdd,
159160
bincount,
160161
mv,
161162
eigvalsh,
@@ -695,6 +696,7 @@
695696
'rot90',
696697
'bincount',
697698
'histogram',
699+
'histogramdd',
698700
'multiplex',
699701
'CUDAPlace',
700702
'empty',

python/paddle/tensor/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@
7979
eigvals,
8080
eigvalsh,
8181
histogram,
82+
histogramdd,
8283
householder_product,
8384
lstsq,
8485
lu,
@@ -435,6 +436,7 @@
435436
'cholesky',
436437
'bmm',
437438
'histogram',
439+
'histogramdd',
438440
'bincount',
439441
'mv',
440442
'matrix_power',

python/paddle/tensor/linalg.py

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
__all__ = []
3131

32+
3233
# Consistent with kDefaultDim from C++ Backend
3334
K_DEFAULT_DIM = 9
3435

@@ -3869,3 +3870,227 @@ def _householder_product(x, tau):
38693870
)
38703871
out = out.reshape(org_x_shape)
38713872
return out
3873+
3874+
3875+
def histogramdd(
3876+
x, bins=10, ranges=None, density=False, weights=None, name=None
3877+
):
3878+
r"""
3879+
Computes a multi-dimensional histogram of the values in a tensor.
3880+
3881+
Interprets the elements of an input tensor whose innermost dimension has size `N` as a collection of N-dimensional points. Maps each of the points into a set of N-dimensional bins and returns the number of points (or total weight) in each bin.
3882+
3883+
input `x` must be a tensor with at least 2 dimensions. If input has shape `(M, N)`, each of its `M` rows defines a point in N-dimensional space. If input has three or more dimensions, all but the last dimension are flattened.
3884+
3885+
Each dimension is independently associated with its own strictly increasing sequence of bin edges. Bin edges may be specified explicitly by passing a sequence of 1D tensors. Alternatively, bin edges may be constructed automatically by passing a sequence of integers specifying the number of equal-width bins in each dimension.
3886+
3887+
Args:
3888+
x (Tensor): The input tensor.
3889+
bins (Tensor[], int[], or int): If Tensor[], defines the sequences of bin edges. If int[], defines the number of equal-width bins in each dimension. If int, defines the number of equal-width bins for all dimensions.
3890+
ranges (sequence of float, optional): Defines the leftmost and rightmost bin edges in each dimension. If is None, set the minimum and maximum as leftmost and rightmost edges for each dimension.
3891+
density (bool, optional): If False (default), the result will contain the count (or total weight) in each bin. If True, each count (weight) is divided by the total count (total weight), then divided by the volume of its associated bin.
3892+
weights (Tensor, optional): By default, each value in the input has weight 1. If a weight tensor is passed, each N-dimensional coordinate in input contributes its associated weight towards its bin’s result. The weight tensor should have the same shape as the input tensor excluding its innermost dimension N.
3893+
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
3894+
3895+
Returns:
3896+
N-dimensional Tensor containing the values of the histogram. ``bin_edges(Tensor[])``, sequence of N 1D Tensors containing the bin edges.
3897+
3898+
Examples:
3899+
.. code-block:: python
3900+
:name: exampl
3901+
3902+
>>> import paddle
3903+
>>> x = paddle.to_tensor([[0., 1.], [1., 0.], [2.,0.], [2., 2.]])
3904+
>>> bins = [3,3]
3905+
>>> weights = paddle.to_tensor([1., 2., 4., 8.])
3906+
>>> paddle.histogramdd(x, bins=bins, weights=weights)
3907+
(Tensor(shape=[3, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True,
3908+
[[0., 1., 0.],
3909+
[2., 0., 0.],
3910+
[4., 0., 8.]]), [Tensor(shape=[4], dtype=float32, place=Place(gpu:0), stop_gradient=True,
3911+
[0. , 0.66666669, 1.33333337, 2. ]), Tensor(shape=[4], dtype=float32, place=Place(gpu:0), stop_gradient=True,
3912+
[0. , 0.66666669, 1.33333337, 2. ])])
3913+
3914+
.. code-block:: python
3915+
:name: examp2
3916+
3917+
>>> import paddle
3918+
>>> y = paddle.to_tensor([[0., 0.], [1., 1.], [2., 2.]])
3919+
>>> bins = [2,2]
3920+
>>> ranges = [0., 1., 0., 1.]
3921+
>>> density = True
3922+
>>> paddle.histogramdd(y, bins=bins, ranges=ranges, density=density)
3923+
(Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=True,
3924+
[[2., 0.],
3925+
[0., 2.]]), [Tensor(shape=[3], dtype=float32, place=Place(gpu:0), stop_gradient=True,
3926+
[0. , 0.50000000, 1. ]), Tensor(shape=[3], dtype=float32, place=Place(gpu:0), stop_gradient=True,
3927+
[0. , 0.50000000, 1. ])])
3928+
3929+
3930+
"""
3931+
3932+
def __check_x(x):
3933+
assert (
3934+
len(x.shape) >= 2
3935+
), "input x must be a tensor with at least 2 dimensions."
3936+
check_variable_and_dtype(
3937+
x,
3938+
'x',
3939+
[
3940+
'float32',
3941+
'float64',
3942+
],
3943+
'histogramdd',
3944+
)
3945+
3946+
def __check_bins(bins, x): # when Tensor[], check dtype
3947+
for bins_tensor in bins:
3948+
bins_tensor = paddle.to_tensor(bins_tensor)
3949+
check_variable_and_dtype(
3950+
bins_tensor,
3951+
'bins',
3952+
[
3953+
'float32',
3954+
'float64',
3955+
],
3956+
'histogramdd',
3957+
)
3958+
assert (
3959+
bins_tensor.dtype == x.dtype
3960+
), "When bins is Tensor[], the dtype of bins must be the same as x.\n"
3961+
3962+
def __check_weights(x, weights):
3963+
if weights is None:
3964+
return
3965+
x_shape, weights_shape = x.shape, weights.shape
3966+
assert len(x_shape) == len(weights_shape) + 1, (
3967+
"if weight tensor is provided,"
3968+
"it should have the same shape as the input tensor excluding its innermost dimension.\n"
3969+
)
3970+
for i, _ in enumerate(weights_shape):
3971+
assert weights_shape[i] == x_shape[i], (
3972+
"if weight tensor is provided,"
3973+
"it should have the same shape as the input tensor excluding its innermost dimension.\n"
3974+
)
3975+
check_variable_and_dtype(
3976+
weights,
3977+
'weights',
3978+
[
3979+
'float32',
3980+
'float64',
3981+
],
3982+
'histogramdd',
3983+
)
3984+
assert (
3985+
weights.dtype == x.dtype
3986+
), "The dtype of weights must be the same as x.\n"
3987+
3988+
def __check_ranges(D, ranges):
3989+
if ranges is None:
3990+
return
3991+
check_type(ranges, 'ranges', (list, tuple), 'histogramdd')
3992+
assert D * 2 == len(
3993+
ranges
3994+
), "The length of ranges list must be %d\n" % (D * 2)
3995+
3996+
check_type(density, 'density', bool, 'histogramdd')
3997+
3998+
__check_x(x)
3999+
# weights
4000+
__check_weights(x, weights)
4001+
D = x.shape[-1]
4002+
reshaped_input = x.reshape([-1, D])
4003+
N = reshaped_input.shape[0]
4004+
reshaped_weights = None
4005+
if weights is not None:
4006+
weights = weights.astype(x.dtype)
4007+
reshaped_weights = weights.reshape([N])
4008+
assert reshaped_weights.shape[0] == N, (
4009+
"The size of weight must be %d" % N
4010+
)
4011+
# ranges
4012+
__check_ranges(D, ranges)
4013+
if ranges is None:
4014+
ranges = paddle.zeros([D, 2], dtype=x.dtype)
4015+
maxv = paddle.max(reshaped_input, axis=0).reshape([-1])
4016+
minv = paddle.min(reshaped_input, axis=0).reshape([-1])
4017+
4018+
if paddle.in_dynamic_mode():
4019+
ranges[:, 0] = minv
4020+
ranges[:, 1] = maxv
4021+
else:
4022+
ranges = paddle.static.setitem(ranges, (slice(None), 0), minv)
4023+
ranges = paddle.static.setitem(ranges, (slice(None), 1), maxv)
4024+
else:
4025+
ranges = paddle.to_tensor(ranges, dtype=x.dtype).reshape([D, 2])
4026+
# bins to edges
4027+
edges = []
4028+
hist_shape = []
4029+
dedges = []
4030+
if isinstance(bins, (int, list)): # int or int[]
4031+
if isinstance(bins, int):
4032+
bins = [bins] * D
4033+
assert len(bins) == D, (
4034+
"The length of bins must be %d when bins is a list.\n" % D
4035+
)
4036+
for idx, r in enumerate(ranges):
4037+
if not isinstance(bins[idx], int):
4038+
raise ValueError(
4039+
"The type of %d-th element in bins list must be int." % idx
4040+
)
4041+
e = paddle.linspace(r[0], r[1], bins[idx] + 1, x.dtype)
4042+
edges.append(e)
4043+
dedges.append(e.diff())
4044+
elif isinstance(
4045+
bins, tuple
4046+
): # tuple with D tensors for each innermost dimension
4047+
__check_bins(bins, x)
4048+
for bin in bins:
4049+
bin = paddle.to_tensor(bin)
4050+
edges.append(bin)
4051+
dedges.append(bin.diff())
4052+
else:
4053+
raise ValueError("Input bins must be Tensor[], int[], or int.")
4054+
hist_shape = [edge.shape[0] + 1 for edge in edges]
4055+
index_list = []
4056+
# edges shape: [D, linspaced]
4057+
# index_list shape: [D, N]
4058+
for idx, edge in enumerate(edges):
4059+
edge = paddle.to_tensor(edge)
4060+
index_list.append(
4061+
paddle.searchsorted(edge, reshaped_input[:, idx], right=True)
4062+
)
4063+
index_list = paddle.to_tensor(index_list)
4064+
for i in range(D):
4065+
on_edge = reshaped_input[:, i] == edges[i][-1]
4066+
if paddle.in_dynamic_mode():
4067+
index_list[i][on_edge] -= 1
4068+
else:
4069+
index_list = paddle.static.setitem(
4070+
index_list, (i, on_edge), index_list[i][on_edge] - 1
4071+
)
4072+
index_list = tuple(index_list)
4073+
lut = paddle.arange(
4074+
paddle.to_tensor(hist_shape).prod(),
4075+
).reshape(hist_shape)
4076+
flattened_index = lut[index_list]
4077+
hist = paddle.bincount(
4078+
flattened_index,
4079+
reshaped_weights,
4080+
minlength=paddle.to_tensor(hist_shape).prod(),
4081+
)
4082+
hist = hist.reshape(hist_shape)
4083+
hist = hist.astype(x.dtype)
4084+
4085+
core = D * (slice(1, -1),)
4086+
hist = hist[core]
4087+
4088+
if density:
4089+
s = hist.sum()
4090+
for i in range(D):
4091+
shape = D * [1]
4092+
shape[i] = hist_shape[i] - 2
4093+
hist = hist / dedges[i].reshape(shape)
4094+
hist /= s
4095+
4096+
return (hist, edges)

0 commit comments

Comments
 (0)