Skip to content
Merged
Show file tree
Hide file tree
Changes from 36 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
d25d385
WIP: Implement hypothesis strategies and tests for arrays
csadorf Nov 21, 2022
454a073
Remove explicit testing of numba arrays.
csadorf Nov 22, 2022
b5de8d4
Continue implementation.
csadorf Nov 22, 2022
716e02f
Make create_cuml_array_input public function.
csadorf Nov 22, 2022
5ff60ad
Hypothesize test_get_set_item.
csadorf Nov 22, 2022
7665b23
Raise ValueError for invalid input to cuml_array_shapes.
csadorf Nov 22, 2022
f98739b
The cuml_array_shapes() strategy also returns integers.
csadorf Nov 22, 2022
5b990eb
Only run standard number of examples.
csadorf Nov 22, 2022
3f35726
Hypothesize test_create_empty.
csadorf Nov 22, 2022
03d0f0e
Reenable DeviceBuffer check since #4332 is resolved.
csadorf Nov 22, 2022
0b81196
Remove obsolete py<38 compatibility work-around.
csadorf Nov 22, 2022
b8774ac
Hypothesize test_create_* tests.
csadorf Nov 22, 2022
b214e77
Improve shape normalization and inspection.
csadorf Nov 22, 2022
b369b90
Hypothesize test_output test.
csadorf Nov 22, 2022
a698a23
Hypothesize test_output_dtype test.
csadorf Nov 22, 2022
573916a
Hypothesize test_cuda_array_interface test.
csadorf Nov 22, 2022
ccf730d
Hypothesize test_serialize test.
csadorf Nov 22, 2022
751391a
Hypothesize test_cumlary_binops and test_deepcopy tests.
csadorf Nov 22, 2022
0f95380
Improve cuml_arrays strategy (currently not used).
csadorf Nov 22, 2022
0782b4e
Cleanup test_array test module.
csadorf Nov 22, 2022
4ba3fc3
Use less rigorous mulit-dimension check for init_array.
csadorf Nov 22, 2022
a42da0a
Move test of array_inputs strategies into test_strategies module.
csadorf Nov 22, 2022
c9e02fa
Implement test_get_set_item with cuml_array_inputs.
csadorf Nov 24, 2022
cec283e
Implement test_output with cuml_array_inputs.
csadorf Nov 24, 2022
85ec179
Fix multidim check for test_output_dtype.
csadorf Nov 24, 2022
7051f93
Implement test_cuda_array_interface test with cuml_array_inputs.
csadorf Nov 24, 2022
9b3b7b4
Implement test_serialize with cuml_array_inputs.
csadorf Nov 24, 2022
d7e078b
Implement test_pickle with cuml_array_inputs.
csadorf Nov 24, 2022
49e2bb9
Implement test_deepcopy with cuml_array_inputs.
csadorf Nov 24, 2022
1f14486
Document new strategies.
csadorf Nov 28, 2022
0944b43
Remove obsolete None (default) value from valid cuml array input types.
csadorf Nov 29, 2022
732c7f7
Adjust cuml_array_shapes() max_side default value.
csadorf Nov 29, 2022
87c28c0
Apply isort and black formatting.
csadorf Nov 29, 2022
ec5f957
Remove _CUML_ARRAY_OUTPUT_DTYPES constant.
csadorf Nov 29, 2022
5c1d5b3
Remove todo comment (captured in discussion).
csadorf Nov 29, 2022
a399e85
The cuml_array_inputs strategy generates more arbitrary arrays.
csadorf Nov 30, 2022
d58b661
Set hypothesis deadline=None for test_array_init_from_bytes test.
csadorf Dec 8, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
277 changes: 275 additions & 2 deletions python/cuml/testing/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,285 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import cudf
import cupy as cp
import numpy as np
from hypothesis import assume
from hypothesis.extra.numpy import arrays, floating_dtypes
from hypothesis.strategies import composite, integers, just, none, one_of
from hypothesis.extra.numpy import array_shapes, arrays, floating_dtypes
from hypothesis.strategies import (
composite,
integers,
just,
none,
one_of,
sampled_from,
)
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

from cuml.common.array import CumlArray

_CUML_ARRAY_INPUT_TYPES = ["numpy", "cupy", "series"]


_CUML_ARRAY_DTYPES = [
np.float16,
np.float32,
np.float64,
np.int8,
np.int16,
np.int32,
np.int64,
np.uint8,
np.uint16,
np.uint32,
np.uint64,
]

_CUML_ARRAY_ORDERS = ["F", "C"]


_CUML_ARRAY_OUTPUT_TYPES = [
"cudf",
"cupy",
"dataframe",
"numba",
"numpy",
"series",
]


UNSUPPORTED_CUDF_DTYPES = [
np.uint8,
np.uint16,
np.uint32,
np.uint64,
np.float16,
]


@composite
def cuml_array_input_types(draw):
"""Generates all supported cuml array input types."""
return draw(sampled_from(_CUML_ARRAY_INPUT_TYPES))


@composite
def cuml_array_output_types(draw):
"""Generates all cuml array supported output types."""
return draw(sampled_from(_CUML_ARRAY_OUTPUT_TYPES))


@composite
def cuml_array_output_dtypes(draw):
"""Generates all cuml array supported output dtypes."""
return draw(sampled_from(_CUML_ARRAY_DTYPES))


@composite
def cuml_array_dtypes(draw):
"""Generates all supported cuml array dtypes."""
return draw(sampled_from(_CUML_ARRAY_DTYPES))


@composite
def cuml_array_orders(draw):
"""Generates all supported cuml array orders."""
return draw(sampled_from(_CUML_ARRAY_ORDERS))


@composite
def cuml_array_shapes(
draw, *, min_dims=1, max_dims=2, min_side=1, max_side=None
):
"""
Generates cuml array shapes.

See also: hypothesis.extra.numpy.array_shapes()

Parameters
----------
min_dims: int, default=1
Returned shapes will have at least this number of dimensions.
max_dims: int, default=2
Returned shapes will have at most this number of dimensions.
min_side: int, default=1
Comment thread
wphicks marked this conversation as resolved.
Returned shapes will have at least this size in any dimension.
max_side: int | None, default=min_side + 9
Returned shapes will have at most this size in any dimension.

Returns
-------
Shapes for cuml array inputs.
"""
max_side = min_side + 9 if max_side is None else max_side

if not (1 <= min_dims <= max_dims):
raise ValueError(
"Arguments violate condition 1 <= min_dims <= max_dims."
)
if not (0 < min_side < max_side):
raise ValueError(
"Arguments violate condition 0 < min_side < max_side."
)

shapes = array_shapes(
min_dims=min_dims,
max_dims=max_dims,
min_side=min_side,
max_side=max_side,
)
just_size = integers(min_side, max_side)
return draw(one_of(shapes, just_size))


def create_cuml_array_input(input_type, dtype, shape, order):
"""
Creates a valid cuml array input.

Parameters
----------
input_type: str | None, default=cupy
Valid input types are "numpy", "cupy", "series".
dtype: Data type specifier
A numpy/cupy compatible data type, e.g., numpy.float64.
shape: int | tuple[int]
Dimensions of the array to generate.
order : str in {'C', 'F'}
Order of arrays to generate, either F- or C-major.

Returns
-------
A cuml array input array.
"""

input_type = "cupy" if input_type is None else input_type

multidimensional = (
isinstance(shape, tuple) and len([d for d in shape if d > 1]) > 1
)
assume(
not (
input_type == "series"
and (dtype in UNSUPPORTED_CUDF_DTYPES or multidimensional)
)
)

array = cp.ones(shape, dtype=dtype, order=order)

if input_type == "numpy":
return np.array(cp.asnumpy(array), dtype=dtype, order=order)

elif input_type == "series":
return cudf.Series(array)

elif input_type == "cupy":
return array

raise ValueError(
"The value for 'input_type' must be "
f"one of {', '.join(_CUML_ARRAY_INPUT_TYPES)}."
)


@composite
def cuml_array_inputs(
draw,
input_types=cuml_array_input_types(),
dtypes=cuml_array_dtypes(),
shapes=cuml_array_shapes(),
orders=cuml_array_orders(),
):
"""
Generates valid inputs for cuml arrays.

Parameters
----------
input_types: SearchStrategy[("numpy", "cupy", "series")], \
default=cuml_array_input_tyes()
A search strategy for the type of array input.
dtypes: SearchStrategy[np.dtype], default=cuml_array_dtypes()
A search strategy for a numpy/cupy compatible data type.
shapes: SearchStrategy[int | tuple[int]], default=cuml_array_shapes()
A search strategy for array shapes.
orders : str in {'C', 'F'}, default=cuml_array_orders()
A search strategy for array orders.

Returns
-------
A strategy for valid cuml array inputs.
"""
input_type = draw(input_types)
dtype = draw(dtypes)
shape = draw(shapes)
order = draw(orders)
multidimensional = (isinstance(shape, tuple) and len(shape) > 1)
assume(
not (
input_type == "series"
and (dtype in UNSUPPORTED_CUDF_DTYPES or multidimensional)
)
)

data = draw(arrays(dtype=dtype, shape=shape))

if input_type == "numpy":
ret = np.asarray(data, order=order)

elif input_type == "cupy":
ret = cp.array(data, dtype=dtype, order=order)

elif input_type == "series":
ret = cudf.Series(data)

else:
raise ValueError(
"The value for 'input_type' must be "
f"one of {', '.join(_CUML_ARRAY_INPUT_TYPES)}."
)

# Cupy currently does not support masked arrays.
cai = getattr(ret, '__cuda_array_interface__', dict())
assume(cai.get('mask') is None)

return ret


@composite
def cuml_arrays(
draw,
input_types=cuml_array_input_types(),
dtypes=cuml_array_dtypes(),
shapes=cuml_array_shapes(),
orders=cuml_array_orders(),
):
"""
Generates cuml arrays.

Parameters
----------
input_types: SearchStrategy[("numpy", "cupy", "series")], \
default=cuml_array_input_tyes()
A search strategy for the type of array input.
dtypes: SearchStrategy[np.dtype], default=cuml_array_dtypes()
A search strategy for a numpy/cupy compatible data type.
shapes: SearchStrategy[int | tuple[int]], default=cuml_array_shapes()
A search strategy for array shapes.
orders : str in {'C', 'F'}, default=cuml_array_orders()
A search strategy for array orders.

Returns
-------
A strategy for cuml arrays.
"""
array_input = create_cuml_array_input(
input_type=draw(input_types),
dtype=draw(dtypes),
shape=draw(shapes),
order=draw(orders),
)
return CumlArray(data=array_input)


@composite
def standard_datasets(
Expand Down
22 changes: 22 additions & 0 deletions python/cuml/testing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import numpy as np
import pandas as pd
from copy import deepcopy
from itertools import dropwhile

from numba import cuda
from numbers import Number
Expand Down Expand Up @@ -758,3 +759,24 @@ def svm_array_equal(a, b, tol=1e-6, relative_diff=True, report_summary=False):
print('Avgdiff:', np.mean(diff), 'stddiyy:', np.std(diff), 'avgval:',
np.mean(b))
return equal


def normalized_shape(shape):
"""Normalize shape to tuple."""
return (shape, ) if isinstance(shape, int) else shape


def squeezed_shape(shape):
"""Remove all trailing axes of length 1 from shape.

Similar to, but not exactly like np.squeeze().
"""
return tuple(reversed(list(dropwhile(lambda d: d == 1, reversed(shape)))))


def series_squeezed_shape(shape):
"""Remove all but one axes of length 1 from shape."""
if shape:
return tuple([d for d in normalized_shape(shape) if d != 1]) or (1,)
else:
return ()
Loading