Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 62 additions & 42 deletions python/cuml/cuml/manifold/t_sne.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ cdef extern from "cuml/manifold/tsne.h" namespace "ML" nogil:

# Changed in scikit-learn version 1.5: Parameter name changed from n_iter to max_iter.
if Version(sklearn.__version__) >= Version("1.5.0"):
_SKLEARN_N_ITER_PARAM = "max_iter"
_SKLEARN_MAX_ITER_PARAM = "max_iter"
else:
_SKLEARN_N_ITER_PARAM = "n_iter"
_SKLEARN_MAX_ITER_PARAM = "n_iter"

_SUPPORTED_METRICS = {
"l2": DistanceType.L2SqrtExpanded,
Expand Down Expand Up @@ -173,7 +173,6 @@ cdef _init_params(self, int n_samples, TSNEParams &params):
adaptive_learning = _check_mapping(
self, "learning_rate_method", {"adaptive": True, "none": False, None: False}
)
n_iter = _check_numeric(self, "n_iter", gt=0)
min_grad_norm = _check_numeric(self, "min_grad_norm", ge=0)
angle = _check_numeric(self, "angle", ge=0, le=1)
n_neighbors = _check_numeric(self, "n_neighbors", gt=0)
Expand All @@ -188,7 +187,19 @@ cdef _init_params(self, int n_samples, TSNEParams &params):
if n_samples < 2:
raise ValueError("TSNE requires >= 2 samples")

exaggeration_iter = min(exaggeration_iter, self.n_iter)
if self.n_iter != "deprecated":
warnings.warn(
(
"`n_iter` was deprecated in 25.12 and will be removed in 26.02. "
"Please use `max_iter` instead."
),
FutureWarning,
)
max_iter = _check_numeric(self, "n_iter", gt=0)
else:
max_iter = _check_numeric(self, "max_iter", gt=0)

exaggeration_iter = min(exaggeration_iter, max_iter)
if n_neighbors > 1023:
warnings.warn(
f"n_neighbors ({n_neighbors}) should be < 1024, "
Expand Down Expand Up @@ -237,7 +248,7 @@ cdef _init_params(self, int n_samples, TSNEParams &params):
params.min_gain = 0.01
params.pre_learning_rate = pre_learning_rate
params.post_learning_rate = post_learning_rate
params.max_iter = n_iter
params.max_iter = max_iter
params.min_grad_norm = min_grad_norm
params.pre_momentum = pre_momentum
params.post_momentum = post_momentum
Expand Down Expand Up @@ -283,11 +294,18 @@ class TSNE(Base,
learning_rate : float (default 200.0)
The learning rate usually between (10, 1000). If this is too high,
t-SNE could look like a cloud / ball of points.
n_iter : int (default 1000)
max_iter : int (default 1000)
The more epochs, the more stable/accurate the final embedding.
n_iter_without_progress : int (default 300)
Currently unused. When the KL Divergence becomes too small after some
iterations, terminate t-SNE early.
n_iter : int (default 1000)

.. deprecated:: 25.12
``n_iter`` has been renamed to ``max_iter`` to better match the
API of ``sklearn.manifold.TSNE``. ``n_iter`` is deprecated in favor
of ``max_iter`` and will be removed in 26.02.

min_grad_norm : float (default 1e-07)
The minimum gradient norm for when t-SNE will terminate early.
Used in the 'exact' and 'fft' algorithms. Consider reducing if
Expand Down Expand Up @@ -417,8 +435,9 @@ class TSNE(Base,
"early_exaggeration",
"late_exaggeration",
"learning_rate",
"n_iter",
"max_iter",
"n_iter_without_progress",
"n_iter",
"min_grad_norm",
"metric",
"metric_params",
Expand Down Expand Up @@ -469,8 +488,8 @@ class TSNE(Base,
# For now have `learning_rate="auto"` just use cuml's default
params["learning_rate"]: model.learning_rate

if (max_iter := getattr(model, _SKLEARN_N_ITER_PARAM, None)) is not None:
params["n_iter"] = max_iter
if (max_iter := getattr(model, _SKLEARN_MAX_ITER_PARAM, None)) is not None:
params["max_iter"] = max_iter

return params

Expand All @@ -489,7 +508,7 @@ class TSNE(Base,
"init": self.init,
"random_state": self.random_state,
"method": method,
_SKLEARN_N_ITER_PARAM: self.n_iter,
_SKLEARN_MAX_ITER_PARAM: self.max_iter,
}
return params

Expand All @@ -511,44 +530,45 @@ class TSNE(Base,
**super()._attrs_to_cpu(model)
}

def __init__(self, *,
n_components=2,
perplexity=30.0,
early_exaggeration=12.0,
late_exaggeration=1.0,
learning_rate=200.0,
n_iter=1000,
n_iter_without_progress=300,
min_grad_norm=1e-07,
metric='euclidean',
metric_params=None,
init='random',
random_state=None,
method='fft',
angle=0.5,
n_neighbors=90,
perplexity_max_iter=100,
exaggeration_iter=250,
pre_momentum=0.5,
post_momentum=0.8,
learning_rate_method='adaptive',
square_distances=True,
precomputed_knn=None,
verbose=False,
handle=None,
output_type=None):

super().__init__(handle=handle,
verbose=verbose,
output_type=output_type)

def __init__(
self,
*,
n_components=2,
perplexity=30.0,
early_exaggeration=12.0,
late_exaggeration=1.0,
learning_rate=200.0,
max_iter=1000,
n_iter_without_progress=300,
n_iter="deprecated",
min_grad_norm=1e-07,
metric='euclidean',
metric_params=None,
init='random',
random_state=None,
method='fft',
angle=0.5,
n_neighbors=90,
perplexity_max_iter=100,
exaggeration_iter=250,
pre_momentum=0.5,
post_momentum=0.8,
learning_rate_method='adaptive',
square_distances=True,
precomputed_knn=None,
verbose=False,
handle=None,
output_type=None,
):
super().__init__(handle=handle, verbose=verbose, output_type=output_type)
self.n_components = n_components
self.perplexity = perplexity
self.early_exaggeration = early_exaggeration
self.late_exaggeration = late_exaggeration
self.learning_rate = learning_rate
self.n_iter = n_iter
self.max_iter = max_iter
self.n_iter_without_progress = n_iter_without_progress
self.n_iter = n_iter
self.min_grad_norm = min_grad_norm
self.metric = metric
self.metric_params = metric_params
Expand Down
14 changes: 13 additions & 1 deletion python/cuml/tests/test_tsne.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def test_tsne_large(nrows, ncols, method):
tsne = TSNE(
random_state=1,
exaggeration_iter=1,
n_iter=2,
max_iter=2,
method=method,
min_grad_norm=1e-12,
)
Expand Down Expand Up @@ -430,5 +430,17 @@ def test_tsne_n_iter(algorithm):
n_samples=1000, n_features=64, centers=5, random_state=42
)
model = TSNE(n_components=2, random_state=42).fit(X)
assert model.n_iter_ > 0
assert model.n_iter_ <= model.max_iter


def test_tsne_n_iter_deprecated():
X, _ = make_blobs(
n_samples=1000, n_features=64, centers=5, random_state=42
)
model = TSNE(n_components=2, random_state=42, n_iter=2)
with pytest.warns(FutureWarning, match="n_iter"):
model.fit(X)

assert model.n_iter_ > 0
assert model.n_iter_ <= model.n_iter