From 9509081b1ecfe5a5e0036a8bd6e293c0f291a833 Mon Sep 17 00:00:00 2001 From: Jim Crist-Harif Date: Mon, 29 Sep 2025 13:56:25 -0500 Subject: [PATCH] Ensure finite values passed to SpectralEmbedding The C++ implementation doesn't support non-finite values. Sometimes this results in a failure-to-converge error, sometimes this results in an illegal memory access (and downstream failures eventually leading to potential segfaults). --- python/cuml/cuml/manifold/spectral_embedding.pyx | 13 ++++++++++--- python/cuml/tests/test_spectral_embedding.py | 9 +++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/python/cuml/cuml/manifold/spectral_embedding.pyx b/python/cuml/cuml/manifold/spectral_embedding.pyx index 2f06702e26..3f850eabab 100644 --- a/python/cuml/cuml/manifold/spectral_embedding.pyx +++ b/python/cuml/cuml/manifold/spectral_embedding.pyx @@ -20,10 +20,10 @@ import scipy.sparse as sp from pylibraft.common.handle import Handle import cuml -from cuml.common import input_to_cuml_array from cuml.common.array_descriptor import CumlArrayDescriptor from cuml.internals.array import CumlArray from cuml.internals.base import Base +from cuml.internals.input_utils import input_to_cupy_array from cuml.internals.interop import ( InteropMixin, UnsupportedOnGPU, @@ -154,9 +154,10 @@ def spectral_embedding(A, cdef device_resources *h = handle.getHandle() if affinity == "nearest_neighbors": - A = input_to_cuml_array( + A = input_to_cupy_array( A, order="C", check_dtype=np.float32, convert_to_dtype=cp.float32 ).array + isfinite = cp.isfinite(A).all() elif affinity == "precomputed": # Coerce `A` to a canonical float32 COO sparse matrix if cp_sp.issparse(A): @@ -168,6 +169,7 @@ def spectral_embedding(A, else: A = cp_sp.coo_matrix(cp.asarray(A, dtype="float32")) A.sum_duplicates() + isfinite = cp.isfinite(A.data).all() else: raise ValueError( f"`affinity={affinity!r}` is not supported, expected one of " @@ -176,6 +178,11 @@ def spectral_embedding(A, n_samples, n_features = A.shape + if not isfinite: + raise ValueError( + "Input contains NaN or inf; nonfinite values are not supported" + ) + if n_samples < 2: raise ValueError( f"Found array with {n_samples} sample(s) (shape={A.shape}) while a " @@ -220,7 +227,7 @@ def spectral_embedding(A, deref(h), config, make_device_matrix_view[float, int, row_major]( - A.ptr, + A.data.ptr, A.shape[0], A.shape[1], ), diff --git a/python/cuml/tests/test_spectral_embedding.py b/python/cuml/tests/test_spectral_embedding.py index 1515d2619f..8f4dbf0851 100644 --- a/python/cuml/tests/test_spectral_embedding.py +++ b/python/cuml/tests/test_spectral_embedding.py @@ -232,6 +232,15 @@ def test_spectral_embedding_invalid_affinity(): spectral_embedding(X, affinity="oops!") +@pytest.mark.parametrize("value", [float("inf"), float("nan")]) +@pytest.mark.parametrize("affinity", ["nearest_neighbors", "precomputed"]) +def test_spectral_embedding_nonfinite(value, affinity): + X = np.array([[0, 1], [2, 3], [0, value]], dtype="float32") + + with pytest.raises(ValueError, match="nonfinite"): + spectral_embedding(X, affinity=affinity) + + @pytest.mark.parametrize( "input_type,expected_type", [