Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions python/cuml/cuml/accel/_wrappers/sklearn/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,22 @@
#

import cuml.cluster
from cuml.accel.estimator_proxy import ProxyBase
from cuml.accel.estimator_proxy_mixin import ProxyMixin

__all__ = ("KMeans", "DBSCAN")


class KMeans(ProxyMixin, cuml.cluster.KMeans):
pass
class KMeans(ProxyBase):
_gpu_class = cuml.cluster.KMeans

def _gpu_fit_transform(self, X, y=None, sample_weight=None):
# Fixes signature mismatch with cuml.KMeans. Can be removed after #6741.
return self._gpu.fit_transform(X, y=y, sample_weight=sample_weight)

def _init_centroids(self, *args, **kwargs):
# Exposed for use by the sklearn test suite
return self._cpu._init_centroids(*args, **kwargs)


class DBSCAN(ProxyMixin, cuml.cluster.DBSCAN):
Expand Down
5 changes: 5 additions & 0 deletions python/cuml/cuml/accel/estimator_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,11 @@ def __init__(self, *args, **kwargs):
# be pickled properly.
cls.__module__ = cls._gpu_class._cpu_class_path.rsplit(".", 1)[0]

# Forward _estimator_type as a class attribute if available
_estimator_type = getattr(cls._cpu_class, "_estimator_type", None)
if isinstance(_estimator_type, str):
cls._estimator_type = _estimator_type
Comment thread
csadorf marked this conversation as resolved.

# Add proxy method definitions for all public methods on CPU class
# that aren't already defined on the proxy class
methods = [
Expand Down
48 changes: 23 additions & 25 deletions python/cuml/cuml/accel/tests/scikit-learn/xfail-list.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,17 +76,10 @@
- "sklearn.semi_supervised.tests.test_self_training::test_classification[k_best-estimator0]"
- "sklearn.semi_supervised.tests.test_self_training::test_classification[threshold-estimator0]"
- "sklearn.semi_supervised.tests.test_self_training::test_zero_iterations[y1-estimator0]"
- "sklearn.tests.test_common::test_estimators[ElasticNet()-check_n_features_in_after_fitting]"
- "sklearn.tests.test_common::test_estimators[Lasso()-check_n_features_in_after_fitting]"
- "sklearn.tests.test_common::test_estimators[PCA()-check_n_features_in_after_fitting]"
- "sklearn.tests.test_common::test_estimators[TruncatedSVD()-check_n_features_in_after_fitting]"
- "sklearn.tests.test_common::test_estimators[LinearRegression()-check_n_features_in_after_fitting]"
- "sklearn.tests.test_common::test_estimators[LogisticRegression()-check_estimator_sparse_tag]"
- "sklearn.tests.test_common::test_estimators[LogisticRegression()-check_n_features_in_after_fitting]"
- "sklearn.tests.test_common::test_estimators[LogisticRegression()-check_non_transformer_estimators_n_iter]"
- "sklearn.tests.test_common::test_estimators[LogisticRegression()-check_sample_weight_equivalence_on_dense_data]"
- "sklearn.tests.test_common::test_estimators[LogisticRegression()-check_sample_weight_equivalence_on_sparse_data]"
- "sklearn.tests.test_common::test_estimators[Ridge()-check_n_features_in_after_fitting]"
- "sklearn.tests.test_common::test_estimators[Ridge()-check_non_transformer_estimators_n_iter]"
- "sklearn.utils.tests.test_validation::test_cross_val_predict[coo_matrix]"
- reason: Test should fail with cuml.accel (scikit-learn 1.7)
Expand Down Expand Up @@ -696,7 +689,6 @@
- "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-True-6-24-True-Lasso]"
- "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_lasso_not_as_toy_dataset[csc_array]"
- "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_lasso_not_as_toy_dataset[csc_matrix]"
- "sklearn.manifold.tests.test_spectral_embedding::test_pipeline_spectral_clustering"
- "sklearn.manifold.tests.test_t_sne::test_accessible_kl_divergence"
- "sklearn.manifold.tests.test_t_sne::test_bad_precomputed_distances[D0-.* square distance matrix-barnes_hut-asarray]"
- "sklearn.manifold.tests.test_t_sne::test_bad_precomputed_distances[D0-.* square distance matrix-barnes_hut-csr_array]"
Expand Down Expand Up @@ -1226,6 +1218,8 @@
- "sklearn.manifold.tests.test_t_sne::test_optimization_minimizes_kl_divergence"
- "sklearn.model_selection.tests.test_validation::test_cross_val_predict[coo_array]"
- "sklearn.model_selection.tests.test_validation::test_cross_val_predict[coo_matrix]"
- "sklearn.manifold.tests.test_t_sne::test_uniform_grid[barnes_hit]"
- "sklearn.manifold.tests.test_spectral_embedding::test_pipeline_spectral_clustering"
- reason: "cuml.accel bug: Missing components_ attribute"
tests:
- "sklearn.cluster.tests.test_dbscan::test_dbscan_no_core_samples[csr_array]"
Expand Down Expand Up @@ -1262,9 +1256,6 @@
- reason: "cuml.accel does not support callable initialization for KMeans"
marker: cuml_accel_kmeans_callable_init
tests:
- "sklearn.cluster.tests.test_k_means::test_all_init[KMeans-callable-dense]"
- "sklearn.cluster.tests.test_k_means::test_all_init[KMeans-callable-sparse_array]"
- "sklearn.cluster.tests.test_k_means::test_all_init[KMeans-callable-sparse_matrix]"
- "sklearn.cluster.tests.test_k_means::test_kmeans_init_auto_with_initial_centroids[KMeans-<lambda>-default]"
- reason: "cuml.accel fails for KMeans with AttributeError: 'KMeans' object has no attribute '_n_init'"
marker: cuml_accel_kmeans_n_init
Expand Down Expand Up @@ -1306,13 +1297,6 @@
- "sklearn.cluster.tests.test_k_means::test_dense_sparse[42-KMeans-X_csr0]"
- "sklearn.cluster.tests.test_k_means::test_dense_sparse[42-KMeans-X_csr1]"
- "sklearn.cluster.tests.test_spectral::test_precomputed_nearest_neighbors_filtering"
- reason: "cuml.accel fails on sparse matrix inputs for KMeans"
marker: cuml_accel_kmeans_sparse_unsupported
tests:
- "sklearn.cluster.tests.test_k_means::test_predict_dense_sparse[KMeans-k-means++-X_csr0]"
- "sklearn.cluster.tests.test_k_means::test_predict_dense_sparse[KMeans-k-means++-X_csr1]"
- "sklearn.cluster.tests.test_k_means::test_predict_dense_sparse[KMeans-random-X_csr0]"
- "sklearn.cluster.tests.test_k_means::test_predict_dense_sparse[KMeans-random-X_csr1]"
- reason: "Integer inputs are converted to float32 instead of float64 with cuml.accel"
marker: cuml_accel_kmeans_integer_dtype
tests:
Expand All @@ -1329,19 +1313,13 @@
tests:
- "sklearn.cluster.tests.test_k_means::test_wrong_params[param0-n_samples.* should be >= n_clusters-KMeans]"
- "sklearn.cluster.tests.test_k_means::test_wrong_params[param1-The shape of the initial centers .* does not match the number of clusters-KMeans]"
- "sklearn.cluster.tests.test_k_means::test_wrong_params[param2-The shape of the initial centers .* does not match the number of clusters-KMeans]"
- "sklearn.cluster.tests.test_k_means::test_wrong_params[param3-The shape of the initial centers .* does not match the number of features of the data-KMeans]"
- "sklearn.cluster.tests.test_k_means::test_wrong_params[param4-The shape of the initial centers .* does not match the number of features of the data-KMeans]"
- "sklearn.cluster.tests.test_k_means::test_relocating_with_duplicates[elkan-dense]"
- "sklearn.cluster.tests.test_k_means::test_relocating_with_duplicates[lloyd-dense]"
- reason: "cuml.accel deviates in KMeans empty cluster reloacation (number of labels)"
marker: cuml_accel_kmeans_empty_clusters_num_labels
tests:
- "sklearn.cluster.tests.test_k_means::test_kmeans_empty_cluster_relocated[dense]"
- reason: "cuml.accel fails with AttributeError: 'memmap' object has no attribute 'ptr'."
marker: cuml-accel_kmeans_fails_with_attribute_error_memmap_ptr
tests:
- "sklearn.cluster.tests.test_k_means::test_predict_does_not_change_cluster_centers[None]"
- reason: "The signs of the principal components are swapped with cuml.accel (insignificant deviation)"
condition: "cuda-python >= 12.9"
tests:
Expand Down Expand Up @@ -1437,6 +1415,16 @@
- "sklearn.tests.test_common::test_estimators[TSNE()-check_estimators_empty_data_messages]"
- "sklearn.tests.test_common::test_estimators[TSNE()-check_estimators_nan_inf]"
- "sklearn.tests.test_common::test_estimators[TSNE()-check_estimator_sparse_matrix]"
- "sklearn.tests.test_common::test_estimators[KMeans()-check_sample_weights_not_an_array]"
- "sklearn.tests.test_common::test_estimators[KMeans()-check_sample_weights_shape]"
- "sklearn.tests.test_common::test_estimators[KMeans()-check_sample_weight_equivalence_on_dense_data]"
- "sklearn.tests.test_common::test_estimators[KMeans()-check_sample_weight_equivalence_on_sparse_data]"
- "sklearn.tests.test_common::test_estimators[KMeans()-check_complex_data]"
- "sklearn.tests.test_common::test_estimators[KMeans()-check_dtype_object]"
- "sklearn.tests.test_common::test_estimators[KMeans()-check_estimators_nan_inf]"
- "sklearn.tests.test_common::test_estimators[KMeans()-check_transformer_data_not_an_array]"
- "sklearn.tests.test_common::test_estimators[KMeans()-check_fit1d]"
- "sklearn.tests.test_common::test_estimators[KMeans()-check_fit2d_predict1d]"
- reason: "cuml doesn't set `feature_names_in_` properly"
marker: cuml_accel_feature_names_in
tests:
Expand All @@ -1448,16 +1436,26 @@
- "sklearn.tests.test_common::test_pandas_column_name_consistency[PCA()]"
- "sklearn.tests.test_common::test_pandas_column_name_consistency[TruncatedSVD()]"
- "sklearn.tests.test_common::test_pandas_column_name_consistency[TSNE()]"
- "sklearn.tests.test_common::test_pandas_column_name_consistency[KMeans()]"
- reason: "cuml raises a different error if X doesn't have expected n features"
marker: cuml_accel_check_n_features_in
tests:
- "sklearn.tests.test_common::test_check_n_features_in_after_fitting[ElasticNet()]"
- "sklearn.tests.test_common::test_check_n_features_in_after_fitting[KMeans()]"
- "sklearn.tests.test_common::test_check_n_features_in_after_fitting[Lasso()]"
- "sklearn.tests.test_common::test_check_n_features_in_after_fitting[LinearRegression()]"
- "sklearn.tests.test_common::test_check_n_features_in_after_fitting[LogisticRegression()]"
- "sklearn.tests.test_common::test_check_n_features_in_after_fitting[Ridge()]"
- "sklearn.tests.test_common::test_check_n_features_in_after_fitting[PCA()]"
- "sklearn.tests.test_common::test_check_n_features_in_after_fitting[Ridge()]"
- "sklearn.tests.test_common::test_check_n_features_in_after_fitting[TruncatedSVD()]"
- "sklearn.tests.test_common::test_estimators[ElasticNet()-check_n_features_in_after_fitting]"
- "sklearn.tests.test_common::test_estimators[KMeans()-check_n_features_in_after_fitting]"
- "sklearn.tests.test_common::test_estimators[Lasso()-check_n_features_in_after_fitting]"
- "sklearn.tests.test_common::test_estimators[LinearRegression()-check_n_features_in_after_fitting]"
- "sklearn.tests.test_common::test_estimators[LogisticRegression()-check_n_features_in_after_fitting]"
- "sklearn.tests.test_common::test_estimators[PCA()-check_n_features_in_after_fitting]"
- "sklearn.tests.test_common::test_estimators[Ridge()-check_n_features_in_after_fitting]"
- "sklearn.tests.test_common::test_estimators[TruncatedSVD()-check_n_features_in_after_fitting]"
- reason: "cuml missing certain fit attributes"
marker: cuml_accel_missing_fit_attributes
tests:
Expand Down
Loading