diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index 96adb871d2..140479f536 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -71,7 +71,6 @@ dependencies: - rmm==26.2.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - scikit-learn>=1.4 -- scikit-learn>=1.4,<1.8.0 - scipy>=1.11.0 - seaborn - sphinx diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index 7839dc67b7..5f8e8903a6 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -71,7 +71,6 @@ dependencies: - rmm==26.2.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - scikit-learn>=1.4 -- scikit-learn>=1.4,<1.8.0 - scipy>=1.11.0 - seaborn - sphinx diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml index 494da2e884..449b5ec714 100644 --- a/conda/environments/all_cuda-130_arch-aarch64.yaml +++ b/conda/environments/all_cuda-130_arch-aarch64.yaml @@ -71,7 +71,6 @@ dependencies: - rmm==26.2.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - scikit-learn>=1.4 -- scikit-learn>=1.4,<1.8.0 - scipy>=1.11.0 - seaborn - sphinx diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml index 3e8908c335..6b2b6754d1 100644 --- a/conda/environments/all_cuda-130_arch-x86_64.yaml +++ b/conda/environments/all_cuda-130_arch-x86_64.yaml @@ -71,7 +71,6 @@ dependencies: - rmm==26.2.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - scikit-learn>=1.4 -- scikit-learn>=1.4,<1.8.0 - scipy>=1.11.0 - seaborn - sphinx diff --git a/dependencies.yaml b/dependencies.yaml index f77919dd8d..dc71360a08 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -486,7 +486,7 @@ dependencies: - pytest-cov - pytest-xdist - seaborn - - scikit-learn>=1.4,<1.8.0 + - *scikit_learn - statsmodels - tenacity - umap-learn==0.5.7 diff --git a/python/cuml/cuml_accel_tests/integration/test_hdbscan.py b/python/cuml/cuml_accel_tests/integration/test_hdbscan.py index 68b80205a3..c7c01efd81 100644 --- a/python/cuml/cuml_accel_tests/integration/test_hdbscan.py +++ b/python/cuml/cuml_accel_tests/integration/test_hdbscan.py @@ -13,6 +13,9 @@ from sklearn.preprocessing import StandardScaler if Version(sklearn.__version__) >= Version("1.8.0.dev0"): + # NOTE: Remove this skip when issue + # https://github.com/scikit-learn-contrib/hdbscan/issues/689 is resolved, + # as it blocks compatibility with scikit-learn >= 1.8.0.dev0. pytest.skip( "hdbscan requires sklearn < 1.8.0.dev0", allow_module_level=True ) diff --git a/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml b/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml index 421305135c..1629121a09 100644 --- a/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml +++ b/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml @@ -1,3 +1,44 @@ +- reason: AUC standard deviation differs slightly with cuml.accel in sklearn 1.8 + marker: cuml_accel_bugs + condition: scikit-learn>=1.8 + tests: + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-False-None]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-False-curve_kwargs1]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-True-None]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-True-curve_kwargs1]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-False-None]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-False-curve_kwargs1]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-True-None]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-True-curve_kwargs1]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-False-None]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-False-curve_kwargs1]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-True-None]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-True-curve_kwargs1]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-False-None]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-False-curve_kwargs1]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-True-None]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-True-curve_kwargs1]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[None-None]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[None-curve_kwargs1]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[single-None]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[single-curve_kwargs1]" +- reason: Search CV sample weight equivalence differs with cuml.accel in sklearn 1.8 + marker: cuml_accel_bugs + condition: scikit-learn>=1.8 + tests: + - "sklearn.model_selection.tests.test_search::test_search_cv_sample_weight_equivalence[estimator0]" +- reason: Test should fail with cuml.accel + marker: cuml_accel_bugs + condition: scikit-learn<1.8 + tests: + - "sklearn.linear_model.tests.test_coordinate_descent::test_enet_coordinate_descent[Lasso-1-kwargs1]" + - "sklearn.linear_model.tests.test_coordinate_descent::test_warm_start_convergence" + - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-6-24-False-ElasticNet]" + - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-6-24-True-ElasticNet]" + - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-6-24-False-ElasticNet]" + - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-6-24-True-ElasticNet]" + - "sklearn.manifold.tests.test_t_sne::test_pca_initialization_not_compatible_with_sparse_input[csr_array]" + - "sklearn.manifold.tests.test_t_sne::test_pca_initialization_not_compatible_with_sparse_input[csr_matrix]" - reason: Test should fail with cuml.accel marker: cuml_accel_bugs tests: @@ -200,7 +241,6 @@ - "sklearn.linear_model.tests.test_common::test_balance_property[42-True-LogisticRegression]" - "sklearn.linear_model.tests.test_coordinate_descent::test_check_input_false" - "sklearn.linear_model.tests.test_coordinate_descent::test_elasticnet_precompute_gram_weighted_samples" - - "sklearn.linear_model.tests.test_coordinate_descent::test_enet_coordinate_descent[Lasso-1-kwargs1]" - "sklearn.linear_model.tests.test_coordinate_descent::test_enet_copy_X_False_check_input_False" - "sklearn.linear_model.tests.test_coordinate_descent::test_enet_float_precision" - "sklearn.linear_model.tests.test_coordinate_descent::test_enet_multitarget" @@ -215,7 +255,6 @@ - "sklearn.linear_model.tests.test_coordinate_descent::test_lasso_readonly_data" - "sklearn.linear_model.tests.test_coordinate_descent::test_lasso_toy" - "sklearn.linear_model.tests.test_coordinate_descent::test_lasso_zero" - - "sklearn.linear_model.tests.test_coordinate_descent::test_warm_start_convergence" - "sklearn.linear_model.tests.test_coordinate_descent::test_warm_start_convergence_with_regularizer_decrement" - "sklearn.linear_model.tests.test_ransac::test_perfect_horizontal_line" - "sklearn.linear_model.tests.test_ransac::test_ransac_exceed_max_skips" @@ -240,17 +279,13 @@ - "sklearn.linear_model.tests.test_sag::test_step_size_alpha_error" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_same_multiple_output_sparse_dense[coo_array]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_same_multiple_output_sparse_dense[coo_matrix]" - - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-6-24-False-ElasticNet]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-6-24-False-Lasso]" - - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-6-24-True-ElasticNet]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-6-24-True-Lasso]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-True-6-24-False-ElasticNet]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-True-6-24-False-Lasso]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-True-6-24-True-ElasticNet]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-True-6-24-True-Lasso]" - - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-6-24-False-ElasticNet]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-6-24-False-Lasso]" - - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-6-24-True-ElasticNet]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-6-24-True-Lasso]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-True-6-24-False-ElasticNet]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-True-6-24-False-Lasso]" @@ -262,8 +297,6 @@ - "sklearn.manifold.tests.test_t_sne::test_bh_match_exact" - "sklearn.manifold.tests.test_t_sne::test_binary_perplexity_stability" - "sklearn.manifold.tests.test_t_sne::test_n_iter_without_progress" - - "sklearn.manifold.tests.test_t_sne::test_pca_initialization_not_compatible_with_sparse_input[csr_array]" - - "sklearn.manifold.tests.test_t_sne::test_pca_initialization_not_compatible_with_sparse_input[csr_matrix]" - "sklearn.manifold.tests.test_t_sne::test_preserve_trustworthiness_approximately[pca-barnes_hut]" - "sklearn.manifold.tests.test_t_sne::test_preserve_trustworthiness_approximately[pca-exact]" - "sklearn.manifold.tests.test_t_sne::test_preserve_trustworthiness_approximately[random-barnes_hut]" @@ -451,11 +484,6 @@ - "sklearn.neighbors.tests.test_neighbors::test_neighbors_metrics[float64-minkowski]" - "sklearn.preprocessing.tests.test_data::test_standard_scaler_partial_fit_numerical_stability[csc_array]" - "sklearn.preprocessing.tests.test_data::test_standard_scaler_partial_fit_numerical_stability[csr_array]" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-array]" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-csr_array]" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-csr_matrix]" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-log-csr_array]" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-log-csr_matrix]" - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[False-isotonic]" - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[False-sigmoid]" - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[True-isotonic]" @@ -781,11 +809,6 @@ - "sklearn.model_selection.tests.test_classification_threshold::test_fit_and_score_over_thresholds_sample_weight" - "sklearn.model_selection.tests.test_classification_threshold::test_tuned_threshold_classifier_cv_zeros_sample_weights_equivalence" - "sklearn.model_selection.tests.test_validation::test_cross_val_predict_class_subset" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-array]" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-csr_array]" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-csr_matrix]" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-log-csr_array]" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-log-csr_matrix]" - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[False-isotonic]" - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[False-sigmoid]" - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[True-isotonic]" @@ -834,31 +857,21 @@ - "sklearn.tests.test_common::test_estimators[LogisticRegression()-check_sample_weight_equivalence_on_sparse_data]" - "sklearn.tests.test_common::test_estimators[Ridge()-check_non_transformer_estimators_n_iter]" - "sklearn.utils.tests.test_estimator_checks::test_check_estimator_pairwise" -- reason: Test should fail with cuml.accel (scikit-learn 1.7) +- reason: Test should fail with cuml.accel (scikit-learn <1.8) marker: cuml_accel_bugs - condition: scikit-learn == 1.7.* + condition: scikit-learn<1.8 tests: - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-False-None]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-False-curve_kwargs1]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-True-None]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-True-curve_kwargs1]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-False-None]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-False-curve_kwargs1]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-True-None]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-True-curve_kwargs1]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-False-None]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-False-curve_kwargs1]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-True-None]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-True-curve_kwargs1]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-False-None]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-False-curve_kwargs1]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-True-None]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-True-curve_kwargs1]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[None-None]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[None-curve_kwargs1]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[single-None]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[single-curve_kwargs1]" - - "sklearn.model_selection.tests.test_search::test_search_cv_sample_weight_equivalence[estimator0]" + - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-array]" + - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-csr_array]" + - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-csr_matrix]" + - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-log-csr_array]" + - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-log-csr_matrix]" +- reason: Test should fail with cuml.accel (scikit-learn<1.6) + marker: cuml_accel_bugs + condition: scikit-learn<1.6 + tests: + - "sklearn.tests.test_common::test_check_n_features_in_after_fitting[LogisticRegression()]" + - "sklearn.tests.test_common::test_pandas_column_name_consistency[HalvingGridSearchCV(cv=2,estimator=LogisticRegression(),min_resources='smallest',param_grid={'C':[0.1,1.0]},random_state=0)0]" - reason: Test should fail with cuml.accel (scikit-learn<1.7) marker: cuml_accel_bugs condition: scikit-learn<1.5 @@ -869,12 +882,6 @@ - "sklearn.decomposition.tests.test_pca::test_sparse_pca_solver_error[42-csr_array-auto]" - "sklearn.decomposition.tests.test_pca::test_sparse_pca_solver_error[42-csr_matrix-auto]" - "sklearn.manifold.tests.test_t_sne::test_n_iter_used" -- reason: Test should fail with cuml.accel (scikit-learn<1.7) - marker: cuml_accel_bugs - condition: scikit-learn<1.7 - tests: - - "sklearn.tests.test_common::test_check_n_features_in_after_fitting[LogisticRegression()]" - - "sklearn.tests.test_common::test_pandas_column_name_consistency[HalvingGridSearchCV(cv=2,estimator=LogisticRegression(),min_resources='smallest',param_grid={'C':[0.1,1.0]},random_state=0)0]" - reason: Test should fail with cuml.accel (scikit-learn<1.7) marker: cuml_accel_bugs condition: scikit-learn<1.7,>=1.5.0 @@ -906,6 +913,11 @@ strict: false tests: - "sklearn.tests.test_common::test_check_n_features_in_after_fitting[SpectralEmbedding()]" +- reason: cuml raises a different error if X doesn't have expected n features + marker: cuml_accel_check_n_features_in + condition: scikit-learn==1.7.* + tests: + - "sklearn.tests.test_common::test_estimators[SVC()-check_n_features_in_after_fitting]" - reason: cuml raises a different error if X doesn't have expected n features marker: cuml_accel_check_n_features_in condition: scikit-learn>=1.6 @@ -923,7 +935,6 @@ - "sklearn.tests.test_common::test_estimators[RandomForestClassifier()-check_n_features_in_after_fitting]" - "sklearn.tests.test_common::test_estimators[RandomForestRegressor()-check_n_features_in_after_fitting]" - "sklearn.tests.test_common::test_estimators[Ridge()-check_n_features_in_after_fitting]" - - "sklearn.tests.test_common::test_estimators[SVC()-check_n_features_in_after_fitting]" - "sklearn.tests.test_common::test_estimators[SVR()-check_n_features_in_after_fitting]" - "sklearn.tests.test_common::test_estimators[TruncatedSVD()-check_n_features_in_after_fitting]" - reason: cuml doesn't set `feature_names_in_` properly @@ -955,6 +966,15 @@ tests: - "sklearn.mixture.tests.test_gaussian_mixture::test_gaussian_mixture_precisions_init_diag" - "sklearn.utils.tests.test_estimator_html_repr::test_show_arrow_pipeline" +- reason: Test is flaky with cuml.accel + marker: cuml_accel_flaky + condition: scikit-learn<1.8 + strict: false + tests: + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-barnes_hut]" + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-exact]" + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-barnes_hut]" + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-exact]" - reason: Test is flaky with cuml.accel marker: cuml_accel_flaky condition: scikit-learn>=1.6 @@ -983,10 +1003,6 @@ - "sklearn.feature_selection.tests.test_sequential::test_unsupervised_model_fit[2]" - "sklearn.feature_selection.tests.test_sequential::test_unsupervised_model_fit[3]" - "sklearn.manifold.tests.test_spectral_embedding::test_pipeline_spectral_clustering" - - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-barnes_hut]" - - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-exact]" - - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-barnes_hut]" - - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-exact]" - "sklearn.manifold.tests.test_t_sne::test_optimization_minimizes_kl_divergence" - "sklearn.manifold.tests.test_t_sne::test_uniform_grid[barnes_hut]" - "sklearn.manifold.tests.test_t_sne::test_uniform_grid[exact]" @@ -1159,6 +1175,11 @@ - "sklearn.linear_model._glm.tests.test_glm::test_linalg_warning_with_newton_solver[42]" - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_path_convergence_fail" - "sklearn.linear_model.tests.test_logistic::test_newton_cholesky_fallback_to_lbfgs[42]" +- reason: SVC input handling and validation + marker: cuml_accel_svc_estimator_checks + condition: scikit-learn<1.8 + tests: + - "sklearn.svm.tests.test_svm::test_svc_nonfinite_params" - reason: SVC input handling and validation marker: cuml_accel_svc_estimator_checks condition: scikit-learn>=1.6 @@ -1168,7 +1189,6 @@ - reason: SVC input handling and validation marker: cuml_accel_svc_estimator_checks tests: - - "sklearn.svm.tests.test_svm::test_svc_nonfinite_params" - "sklearn.tests.test_common::test_estimators[SVC()-check_classifier_data_not_an_array]" - "sklearn.tests.test_common::test_estimators[SVC()-check_complex_data]" - "sklearn.tests.test_common::test_estimators[SVC()-check_estimators_nan_inf]" @@ -1349,10 +1369,47 @@ - "sklearn.tests.test_common::test_estimators[TSNE()-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[TSNE()-check_methods_sample_order_invariance]" - "sklearn.tests.test_common::test_estimators[TSNE()-check_methods_subset_invariance]" +- reason: Calibration temperature scaling differs with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[False-temperature]" + - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[True-temperature]" +- reason: Elasticnet scores attribute layout differs with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_scores_attribute_layout_elasticnet" - reason: Flaky deviations in n_iter_ values in cuml.accel strict: false tests: - "sklearn.cluster.tests.test_k_means::test_kmeans_elkan_results[42-1e-100-dense-blobs]" +- reason: Linear SVM sample weight handling differs with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[42-LinearSVC-params0]" + - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[42-LinearSVC-params1]" + - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[42-LinearSVC-params2]" + - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[42-LinearSVC-params3]" + - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[42-LinearSVR-params4]" + - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[42-LinearSVR-params5]" + - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[42-LinearSVR-params6]" +- reason: LinearSVC parameter validation differs with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[42-False-l1-hinge]" + - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[42-False-l2-hinge]" + - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[42-True-l1-hinge]" + - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[42-True-l1-squared_hinge]" +- reason: LinearSVM + condition: scikit-learn<1.8 + tests: + - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-squared_hinge-array]" + - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-squared_hinge-array]" + - "sklearn.svm.tests.test_svm::test_liblinear_set_coef" + - "sklearn.svm.tests.test_svm::test_linear_svm_convergence_warnings" + - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[False-l1-hinge]" + - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[False-l2-hinge]" + - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[True-l1-hinge]" + - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[True-l1-squared_hinge]" - reason: LinearSVM condition: scikit-learn<=1.6 tests: @@ -1377,20 +1434,12 @@ - "sklearn.feature_selection.tests.test_rfe::test_rfe_wrapped_estimator[RFECV-4-importance_getter0]" - "sklearn.feature_selection.tests.test_rfe::test_rfe_wrapped_estimator[RFECV-4-regressor_.coef_]" - "sklearn.model_selection.tests.test_search::test_grid_search_no_score" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-squared_hinge-array]" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-squared_hinge-array]" - "sklearn.svm.tests.test_sparse::test_linearsvc[lil_array-dok_array]" - "sklearn.svm.tests.test_sparse::test_linearsvc[lil_matrix-dok_matrix]" - "sklearn.svm.tests.test_sparse::test_linearsvc_iris[csr_array]" - "sklearn.svm.tests.test_sparse::test_linearsvc_iris[csr_matrix]" - "sklearn.svm.tests.test_sparse::test_sparse_liblinear_intercept_handling" - "sklearn.svm.tests.test_svm::test_dense_liblinear_intercept_handling" - - "sklearn.svm.tests.test_svm::test_liblinear_set_coef" - - "sklearn.svm.tests.test_svm::test_linear_svm_convergence_warnings" - - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[False-l1-hinge]" - - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[False-l2-hinge]" - - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[True-l1-hinge]" - - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[True-l1-squared_hinge]" - "sklearn.tests.test_calibration::test_calibration_default_estimator" - "sklearn.tests.test_calibration::test_calibration_inconsistent_prefit_n_features_in" - "sklearn.tests.test_calibration::test_calibration_multiclass[1-False-isotonic]" @@ -1420,10 +1469,12 @@ - "sklearn.tests.test_common::test_pandas_column_name_consistency[LinearSVC()]" - "sklearn.tests.test_common::test_pandas_column_name_consistency[LinearSVR()]" - reason: LinearSVM test expects exact results on small data, which isn't guaranteed + condition: scikit-learn<1.8 tests: - "sklearn.svm.tests.test_svm::test_bad_input[lil_array]" - "sklearn.svm.tests.test_svm::test_bad_input[lil_matrix]" - reason: LinearSVM tests too strict of tolerances due to differences in solvers + condition: scikit-learn<1.8 tests: - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[LinearSVC-params0]" - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[LinearSVC-params1]" @@ -1432,6 +1483,58 @@ - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[LinearSVR-params4]" - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[LinearSVR-params5]" - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[LinearSVR-params6]" +- reason: LogisticRegression liblinear sample weight handling differs with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_logisticregression_liblinear_sample_weight[42-params0]" + - "sklearn.linear_model.tests.test_logistic::test_logisticregression_liblinear_sample_weight[42-params1]" + - "sklearn.linear_model.tests.test_logistic::test_logisticregression_liblinear_sample_weight[42-params2]" +- reason: LogisticRegression missing _predict_proba_lr attribute with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_logreg_predict_proba_multinomial[42]" +- reason: LogisticRegression multiclass solvers differ with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_solvers_multiclass[True]" +- reason: LogisticRegression sample weights handling differs with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_sample_weights[42-lbfgs-single]" + - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_sample_weights[42-liblinear-single]" + - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_sample_weights[42-newton-cg-single]" + - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_sample_weights[42-newton-cholesky-single]" + - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_sample_weights[42-sag-single]" + - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_sample_weights[42-saga-single]" +- reason: LogisticRegressionCV fold coefficients differ with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_LogisticRegressionCV_on_folds" +- reason: Multinomial logistic regression class weight handling differs with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_multinomial_logistic_regression_string_inputs" +- reason: 'Numerical precision difference: cuML uses float32, test expects float64 precision' + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_logistic_cv[42-False]" + - "sklearn.linear_model.tests.test_logistic::test_logistic_cv[42-True]" + - "sklearn.linear_model.tests.test_logistic::test_logistic_cv_multinomial_score[42-neg_log_loss-multiclass_agg_list3]" + - "sklearn.linear_model.tests.test_logistic::test_logistic_glmnet[lbfgs]" + - "sklearn.linear_model.tests.test_logistic::test_logistic_glmnet[newton-cholesky]" +- reason: Numerical tolerance issue with Lasso sparse/dense equality in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-24-6-False-Lasso]" + - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-24-6-True-Lasso]" + - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-24-6-False-Lasso]" + - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-24-6-True-Lasso]" +- reason: Numerical tolerance issue with t-SNE sparse input in sklearn 1.8 (flaky) + condition: scikit-learn>=1.8 + strict: false + tests: + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-random-exact]" + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-random-exact]" - reason: Ridge doesn't implement n_iter yet tests: - "sklearn.linear_model.tests.test_ridge::test_n_iter" @@ -1473,3 +1576,122 @@ - reason: The sklearn test has the error message accidentally flipped, our message is correct tests: - "sklearn.linear_model.tests.test_ridge::test_ridge_individual_penalties" +- reason: cuML TSNE barnes_hut produces poor quality embeddings with sparse input + condition: scikit-learn>=1.8 + tests: + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-random-barnes_hut]" + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-random-barnes_hut]" +- reason: cuML TSNE does not support sparse input with PCA initialization + condition: scikit-learn>=1.8 + tests: + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-pca-barnes_hut]" + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-pca-exact]" + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-pca-barnes_hut]" + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-pca-exact]" +- reason: cuML does not emit ConvergenceWarning in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-lbfgs-lbfgs failed to converge-max_iter0]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-lbfgs-lbfgs failed to converge-max_iter1]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-lbfgs-lbfgs failed to converge-max_iter2]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-lbfgs-lbfgs failed to converge-max_iter3]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-liblinear-Liblinear failed to converge, increase the number of iterations.-max_iter0]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-liblinear-Liblinear failed to converge, increase the number of iterations.-max_iter1]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-liblinear-Liblinear failed to converge, increase the number of iterations.-max_iter2]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-liblinear-Liblinear failed to converge, increase the number of iterations.-max_iter3]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-newton-cg-newton-cg failed to converge.* Increase the number of iterations.-max_iter0]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-newton-cg-newton-cg failed to converge.* Increase the number of iterations.-max_iter1]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-newton-cg-newton-cg failed to converge.* Increase the number of iterations.-max_iter2]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-newton-cg-newton-cg failed to converge.* Increase the number of iterations.-max_iter3]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-newton-cholesky-Newton solver did not converge after [0-9]* iterations-max_iter0]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-sag-The max_iter was reached which means the coef_ did not converge-max_iter0]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-sag-The max_iter was reached which means the coef_ did not converge-max_iter1]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-sag-The max_iter was reached which means the coef_ did not converge-max_iter2]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-sag-The max_iter was reached which means the coef_ did not converge-max_iter3]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-saga-The max_iter was reached which means the coef_ did not converge-max_iter0]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-saga-The max_iter was reached which means the coef_ did not converge-max_iter1]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-saga-The max_iter was reached which means the coef_ did not converge-max_iter2]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-saga-The max_iter was reached which means the coef_ did not converge-max_iter3]" + - "sklearn.svm.tests.test_svm::test_linear_svm_convergence_warnings[42]" +- reason: cuML doesn't enforce sparse matrix int64 indices restriction + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_large_sparse_matrix[42-csr_array-liblinear]" + - "sklearn.linear_model.tests.test_logistic::test_large_sparse_matrix[42-csr_array-sag]" + - "sklearn.linear_model.tests.test_logistic::test_large_sparse_matrix[42-csr_array-saga]" + - "sklearn.linear_model.tests.test_logistic::test_large_sparse_matrix[42-csr_matrix-liblinear]" + - "sklearn.linear_model.tests.test_logistic::test_large_sparse_matrix[42-csr_matrix-sag]" + - "sklearn.linear_model.tests.test_logistic::test_large_sparse_matrix[42-csr_matrix-saga]" +- reason: cuML doesn't support warm_start with newton solvers + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-1-False-newton-cg]" + - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-1-False-newton-cholesky]" + - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-1-True-newton-cg]" + - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-1-True-newton-cholesky]" + - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-inf-False-newton-cg]" + - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-inf-False-newton-cholesky]" + - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-inf-True-newton-cg]" + - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-inf-True-newton-cholesky]" +- reason: cuML doesn't validate NaN input in the same way as sklearn + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_nan" +- reason: cuML proxy doesn't replicate sklearn warnings in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_l1_ratio_None_deprecated" + - "sklearn.linear_model.tests.test_logistic::test_l1_ratio_non_elasticnet" + - "sklearn.linear_model.tests.test_logistic::test_logisticregression_warns_with_n_jobs" + - "sklearn.linear_model.tests.test_logistic::test_lr_penalty_l1ratio_incompatible[l1-0.0]" + - "sklearn.linear_model.tests.test_logistic::test_lr_penalty_l1ratio_incompatible[l2-1.0]" + - "sklearn.linear_model.tests.test_logistic::test_penalty_deprecated[LogisticRegression]" + - "sklearn.linear_model.tests.test_logistic::test_penalty_none[42-lbfgs]" + - "sklearn.linear_model.tests.test_logistic::test_penalty_none[42-newton-cg]" + - "sklearn.linear_model.tests.test_logistic::test_penalty_none[42-newton-cholesky]" + - "sklearn.linear_model.tests.test_logistic::test_penalty_none[42-sag]" + - "sklearn.linear_model.tests.test_logistic::test_penalty_none[42-saga]" +- reason: cuML proxy doesn't support direct writes to coef_/intercept_ attributes + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_write_parameters" +- reason: cuML uses different solver backend, doesn't enforce liblinear restrictions + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_check_solver_option[LogisticRegression]" + - "sklearn.linear_model.tests.test_logistic::test_liblinear_multiclass_raises[LogisticRegression]" +- reason: cuML uses float32 for liblinear solver, sklearn expects float64 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_dtype_match[csr_array-False-liblinear]" + - "sklearn.linear_model.tests.test_logistic::test_dtype_match[csr_array-True-liblinear]" + - "sklearn.linear_model.tests.test_logistic::test_dtype_match[csr_matrix-False-liblinear]" + - "sklearn.linear_model.tests.test_logistic::test_dtype_match[csr_matrix-True-liblinear]" +- reason: cuPy sparse matrices don't support int64 dtype in decision_function output + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_predict_2_classes[csr_array]" + - "sklearn.linear_model.tests.test_logistic::test_predict_2_classes[csr_matrix]" + - "sklearn.linear_model.tests.test_logistic::test_predict_3_classes[csr_array]" + - "sklearn.linear_model.tests.test_logistic::test_predict_3_classes[csr_matrix]" +- reason: cuml.accel handles sparse input differently in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.svm.tests.test_svm::test_bad_input[42-lil_array]" + - "sklearn.svm.tests.test_svm::test_bad_input[42-lil_matrix]" +- reason: cuml.accel raises RuntimeError instead of ValueError for non-finite params in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.svm.tests.test_svm::test_svc_nonfinite_params[42]" +- reason: l1_min_c calculation differs with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-log-csr_array]" + - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-log-csr_matrix]" + - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-squared_hinge-array]" +- reason: liblinear solver behavior differs with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_liblinear_dual_random_state[42]" + - "sklearn.linear_model.tests.test_logistic::test_liblinear_with_large_values" + - "sklearn.svm.tests.test_svm::test_liblinear_set_coef[42]" diff --git a/python/cuml/cuml_accel_tests/upstream/umap/run-tests.sh b/python/cuml/cuml_accel_tests/upstream/umap/run-tests.sh index f2bc065042..3e0249b6bf 100755 --- a/python/cuml/cuml_accel_tests/upstream/umap/run-tests.sh +++ b/python/cuml/cuml_accel_tests/upstream/umap/run-tests.sh @@ -14,6 +14,21 @@ set -eu UMAP_TAG="release-0.5.7" +# Skip tests for scikit-learn >= 1.8 -- umap-learn is not compatible with scikit-learn 1.8 yet +python -c " +import sys +from packaging.version import Version +import sklearn +sys.exit( + int( + Version(sklearn.__version__) >= Version('1.8') + ) +) +" || { + echo "Skipping umap tests for scikit-learn >= 1.8" + exit 0 +} + THIS_DIRECTORY=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P ) UMAP_REPO="${THIS_DIRECTORY}/umap-upstream" diff --git a/python/cuml/pyproject.toml b/python/cuml/pyproject.toml index 69e930c553..77569d1b6a 100644 --- a/python/cuml/pyproject.toml +++ b/python/cuml/pyproject.toml @@ -120,7 +120,7 @@ test = [ "pytest-xdist", "pytest<9.0", "pyyaml", - "scikit-learn>=1.4,<1.8.0", + "scikit-learn>=1.4", "seaborn", "statsmodels", "tenacity", diff --git a/python/cuml/tests/test_fil.py b/python/cuml/tests/test_fil.py index 31997b52df..63c991702b 100644 --- a/python/cuml/tests/test_fil.py +++ b/python/cuml/tests/test_fil.py @@ -10,7 +10,9 @@ import numpy as np import pandas as pd import pytest +import sklearn import treelite +from packaging.version import Version # Import XGBoost before scikit-learn to work around a libgomp bug # See https://github.com/dmlc/xgboost/issues/7110 @@ -910,6 +912,9 @@ def test_device_selection(device_id, model_kind, tmp_path): ) xgb_model.fit(X, y) model_path = os.path.join(tmp_path, "xgb_class.ubj") + # skip with sklearn version 1.8.0.dev0 + if Version(sklearn.__version__) >= Version("1.8.0.dev0"): + pytest.skip("xgboost is incompatible with sklearn >= 1.8.0.dev0") xgb_model.save_model(model_path) fm = ForestInference.load( model_path, diff --git a/python/cuml/tests/test_hdbscan.py b/python/cuml/tests/test_hdbscan.py index c6d876c8ac..ac3bfb8821 100644 --- a/python/cuml/tests/test_hdbscan.py +++ b/python/cuml/tests/test_hdbscan.py @@ -6,6 +6,8 @@ import numpy as np import pandas as pd import pytest +import sklearn +from packaging.version import Version from pylibraft.common import DeviceResourcesSNMG from sklearn import datasets from sklearn.datasets import make_blobs @@ -23,6 +25,11 @@ from cuml.testing.datasets import make_pattern from cuml.testing.utils import array_equal +if Version(sklearn.__version__) >= Version("1.8.0.dev0"): + pytest.skip( + "hdbscan requires sklearn < 1.8.0.dev0", allow_module_level=True + ) + dataset_names = ["noisy_circles", "noisy_moons", "varied"] diff --git a/python/cuml/tests/test_linear_model.py b/python/cuml/tests/test_linear_model.py index e4e243dedf..062d546675 100644 --- a/python/cuml/tests/test_linear_model.py +++ b/python/cuml/tests/test_linear_model.py @@ -329,7 +329,6 @@ def test_logistic_regression( solver="saga", C=C, fit_intercept=fit_intercept, - multi_class="auto", ) else: sklog = skLog( @@ -337,7 +336,6 @@ def test_logistic_regression( solver="saga", C=C, fit_intercept=fit_intercept, - multi_class="auto", ) else: sklog = skLog( @@ -345,7 +343,6 @@ def test_logistic_regression( solver="lbfgs", C=C, fit_intercept=fit_intercept, - multi_class="auto", ) sklog.fit(X_train, y_train) @@ -417,7 +414,7 @@ def test_logistic_regression_model_default(dtype): y_test = y_test.astype(dtype) culog = cuLog() culog.fit(X_train, y_train) - sklog = skLog(multi_class="auto") + sklog = skLog() sklog.fit(X_train, y_train) @@ -586,11 +583,7 @@ def test_logistic_regression_predict_proba( sklog = skLog( fit_intercept=fit_intercept, - **( - {"solver": "lbfgs", "multi_class": "multinomial"} - if num_classes > 2 - else {} - ), + **({"solver": "lbfgs"} if num_classes > 2 else {}), ) sklog.coef_ = culog.coef_ sklog.intercept_ = culog.intercept_ if fit_intercept else 0 diff --git a/python/cuml/tests/test_trustworthiness.py b/python/cuml/tests/test_trustworthiness.py index e684f26442..668a9375df 100644 --- a/python/cuml/tests/test_trustworthiness.py +++ b/python/cuml/tests/test_trustworthiness.py @@ -4,6 +4,8 @@ import cudf import numpy as np import pytest +import sklearn +from packaging.version import Version from sklearn.datasets import make_blobs from sklearn.manifold import trustworthiness as sklearn_trustworthiness from umap import UMAP @@ -19,6 +21,10 @@ @pytest.mark.filterwarnings( "ignore:n_jobs value.*overridden.*by setting random_state.*:UserWarning" ) +@pytest.mark.xfail( + condition=Version(sklearn.__version__) >= Version("1.8.0.dev0"), + reason="umap-learn is incompatible with sklearn >= 1.8.0", +) def test_trustworthiness( input_type, n_samples, n_features, n_components, batch_size ): diff --git a/python/cuml/tests/test_umap.py b/python/cuml/tests/test_umap.py index a66c48dc34..4e80387de6 100644 --- a/python/cuml/tests/test_umap.py +++ b/python/cuml/tests/test_umap.py @@ -9,7 +9,9 @@ import numpy as np import pytest import scipy.sparse as scipy_sparse +import sklearn import umap +from packaging.version import Version from pylibraft.common import DeviceResourcesSNMG from sklearn import datasets from sklearn.cluster import KMeans @@ -29,6 +31,9 @@ unit_param, ) +if Version(sklearn.__version__) >= Version("1.8.0.dev0"): + pytest.skip("umap requires sklearn < 1.8.0.dev0", allow_module_level=True) + dataset_names = ["iris", "digits", "wine", "blobs"]