diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d741d7df9e..1468513d2c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -55,6 +55,7 @@ repos: entry: '(category=|\s)DeprecationWarning[,)]' language: pygrep types_or: [python, cython] + exclude: (^|/)tests?/ - id: cmake-format name: cmake-format entry: ./cpp/scripts/run-cmake-format.sh cmake-format diff --git a/python/cuml/cuml_accel_tests/integration/test_hdbscan_core.py b/python/cuml/cuml_accel_tests/integration/test_hdbscan_core.py index 87537a7df1..e915012b41 100644 --- a/python/cuml/cuml_accel_tests/integration/test_hdbscan_core.py +++ b/python/cuml/cuml_accel_tests/integration/test_hdbscan_core.py @@ -124,6 +124,9 @@ def test_hdbscan_gen_min_span_tree(synthetic_data): ), "Minimum spanning tree should be generated when gen_min_span_tree=True" +@pytest.mark.filterwarnings( + "ignore:Instantiating a backend using a LocalPath:UserWarning" +) def test_hdbscan_memory(synthetic_data, tmpdir): X, _ = synthetic_data from joblib import Memory diff --git a/python/cuml/cuml_accel_tests/integration/test_umap.py b/python/cuml/cuml_accel_tests/integration/test_umap.py index ca7244028e..b18a082e25 100644 --- a/python/cuml/cuml_accel_tests/integration/test_umap.py +++ b/python/cuml/cuml_accel_tests/integration/test_umap.py @@ -66,6 +66,15 @@ def test_umap_min_dist(manifold_data, min_dist): "haversine", ], ) +@pytest.mark.filterwarnings( + "ignore:gradient function is not yet implemented:UserWarning" +) +@pytest.mark.filterwarnings( + "ignore:n_jobs value .* overridden to .* by setting random_state:UserWarning" +) +@pytest.mark.filterwarnings( + "ignore:overflow encountered in cast:RuntimeWarning" +) def test_umap_metric(manifold_data, metric): X = manifold_data # haversine only works for 2D data @@ -177,6 +186,9 @@ def test_umap_angular_rp_forest(manifold_data, angular_rp_forest): @pytest.mark.parametrize("densmap", [True, False]) +@pytest.mark.filterwarnings( + "ignore:n_jobs value .* overridden to .* by setting random_state:UserWarning" +) def test_umap_densmap(manifold_data, densmap): X = manifold_data umap = UMAP(densmap=densmap, random_state=42) diff --git a/python/cuml/cuml_accel_tests/test_magics.py b/python/cuml/cuml_accel_tests/test_magics.py index 071f62caa0..90949b3400 100644 --- a/python/cuml/cuml_accel_tests/test_magics.py +++ b/python/cuml/cuml_accel_tests/test_magics.py @@ -22,6 +22,8 @@ SCRIPT_HEADER = """ +import warnings +warnings.filterwarnings('ignore', category=UserWarning) from IPython.core.interactiveshell import InteractiveShell from traitlets.config import Config c = Config() diff --git a/python/cuml/pyproject.toml b/python/cuml/pyproject.toml index 467f3f44c6..24b7c95bbd 100644 --- a/python/cuml/pyproject.toml +++ b/python/cuml/pyproject.toml @@ -54,30 +54,29 @@ filterwarnings = [ "error:::cudf", "ignore:[^.]*ABCs[^.]*:DeprecationWarning:patsy[.*]", "ignore:(.*)alias(.*):DeprecationWarning:hdbscan[.*]", - # https://github.com/rapidsai/build-planning/issues/116 - "ignore:.*cuda..* module is deprecated.*:DeprecationWarning", - # TODO: https://github.com/rapidsai/cuml/issues/5878 - "ignore:.*ndarray.scatter_[(max|add)].* is deprecated:DeprecationWarning:cupyx", # TODO: https://github.com/rapidsai/cuml/issues/5879 "ignore::FutureWarning:sklearn", "ignore::DeprecationWarning:sklearn", - # https://github.com/pytest-dev/pytest-cov/issues/557 - "ignore:The --rsyncdir command line argument:DeprecationWarning", - # https://github.com/scikit-learn/scikit-learn/pull/25157 - "ignore:.* is deprecated. Use files:DeprecationWarning", - # https://github.com/scikit-learn/scikit-learn/pull/25741 - "ignore:`product` is deprecated as of NumPy 1.25.0:DeprecationWarning", - # https://github.com/scikit-learn-contrib/hdbscan/pull/612 - "ignore:`alltrue` is deprecated as of NumPy 1.25.0:DeprecationWarning", - # https://github.com/scikit-learn/scikit-learn/pull/26287 - "ignore:is_sparse is deprecated and will be removed:DeprecationWarning", + # From sklearn + "ignore:A column-vector y was passed:sklearn.exceptions.DataConversionWarning", # From dask-glm "ignore:pkg_resources is deprecated as an API:DeprecationWarning", "ignore:Deprecated call to `pkg_resources.declare_namespace:DeprecationWarning", "ignore:`rcond` parameter will change to the default:FutureWarning", "ignore:Dask configuration key 'fuse_ave_width':FutureWarning", + # From dask/distributed + "ignore:Dask annotations .* detected:UserWarning", + "ignore:Sending large graph of size:UserWarning", # From hdbscan "ignore:Conversion of an array with ndim > 0:DeprecationWarning", + # From umap-learn + "ignore:Tensorflow not installed.*:ImportWarning:umap", + # From sklearn estimator checks + "ignore:Estimator .* does not inherit from.*:UserWarning", + # From numba - GPU under-utilization warnings in tests with small data + "ignore::numba.core.errors.NumbaPerformanceWarning", + # Allow informational benchmark warnings (pytest-benchmark plugin) + "default::pytest_benchmark.logger.PytestBenchmarkWarning", ] [project] diff --git a/python/cuml/tests/dask/test_dask_logistic_regression.py b/python/cuml/tests/dask/test_dask_logistic_regression.py index 9cea222fc6..6752915b44 100644 --- a/python/cuml/tests/dask/test_dask_logistic_regression.py +++ b/python/cuml/tests/dask/test_dask_logistic_regression.py @@ -552,6 +552,9 @@ def test_sparse_from_dense(reg_dtype, client): @pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.filterwarnings( + "ignore:The max_iter was reached which means the coef_ did not converge:sklearn.exceptions.ConvergenceWarning" +) def test_sparse_nlp20news(dtype, nlp_20news, client): X, y = nlp_20news @@ -1094,6 +1097,9 @@ def test_standardization_sparse_with_shift_scale( @pytest.mark.parametrize("standardization", [False, True]) @pytest.mark.parametrize("fit_intercept", [False, True]) +@pytest.mark.filterwarnings( + "ignore:invalid value encountered in divide:RuntimeWarning:sklearn" +) def test_sparse_all_zeroes( standardization, fit_intercept, client, X=None, y=None, n_parts=2 ): diff --git a/python/cuml/tests/explainer/test_explainer_kernel_shap.py b/python/cuml/tests/explainer/test_explainer_kernel_shap.py index 97bd40d1bc..2e57e1209b 100644 --- a/python/cuml/tests/explainer/test_explainer_kernel_shap.py +++ b/python/cuml/tests/explainer/test_explainer_kernel_shap.py @@ -342,6 +342,11 @@ def test_l1_regularization(exact_shap_regression_dataset, l1_type): assert isinstance(nz, cp.ndarray) +@pytest.mark.filterwarnings("ignore::sklearn.exceptions.ConvergenceWarning") +@pytest.mark.filterwarnings("ignore:Changing solver.*:UserWarning") +@pytest.mark.filterwarnings( + "ignore:overflow encountered in divide:RuntimeWarning" +) def test_typeerror_input(): X, y = make_regression(n_samples=100, n_features=10, random_state=10) clf = Lasso() diff --git a/python/cuml/tests/test_common.py b/python/cuml/tests/test_common.py index 201ada3afe..7430ec3a4e 100644 --- a/python/cuml/tests/test_common.py +++ b/python/cuml/tests/test_common.py @@ -30,6 +30,7 @@ cuml.UMAP, ], ) +@pytest.mark.filterwarnings("ignore:The number of bins.*:UserWarning") def test_random_state_argument(Estimator): X, y = make_blobs(random_state=0) # Check that both integer and np.random.RandomState are accepted diff --git a/python/cuml/tests/test_doctest.py b/python/cuml/tests/test_doctest.py index f35e3d075a..b3eb999580 100644 --- a/python/cuml/tests/test_doctest.py +++ b/python/cuml/tests/test_doctest.py @@ -17,6 +17,7 @@ import doctest import inspect import io +import warnings import cudf import numpy as np @@ -112,8 +113,13 @@ def test_docstring(docstring): # Capture stdout and include failing outputs in the traceback. doctest_stdout = io.StringIO() with contextlib.redirect_stdout(doctest_stdout): - runner.run(docstring) - results = runner.summarize() + # Suppress expected warnings during doctest execution + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + warnings.filterwarnings("ignore", category=FutureWarning) + warnings.filterwarnings("ignore", category=DeprecationWarning) + runner.run(docstring) + results = runner.summarize() try: assert not results.failed, ( f"{results.failed} of {results.attempted} doctests failed for " diff --git a/python/cuml/tests/test_linear_model.py b/python/cuml/tests/test_linear_model.py index 923c7d5c65..f6fe6a91dc 100644 --- a/python/cuml/tests/test_linear_model.py +++ b/python/cuml/tests/test_linear_model.py @@ -1129,6 +1129,7 @@ def test_elasticnet_solvers_eq(datatype, alpha, l1_ratio, nrows, column_info): assert np.corrcoef(cd.coef_, qn.coef_)[0, 1] > 0.98 +@pytest.mark.filterwarnings("ignore:Changing solver.*:UserWarning") @given( algorithm=st.sampled_from(ALGORITHMS), xp=st.sampled_from([np, cp]), diff --git a/python/cuml/tests/test_metrics.py b/python/cuml/tests/test_metrics.py index dc511a5575..cd1c819e65 100644 --- a/python/cuml/tests/test_metrics.py +++ b/python/cuml/tests/test_metrics.py @@ -835,6 +835,9 @@ def test_confusion_matrix_binary(): cp.testing.assert_array_equal(ref, cp.array([tn, fp, fn, tp])) +@pytest.mark.filterwarnings( + "ignore:The number of unique classes is greater than 50% of the number of samples.*:UserWarning" +) @pytest.mark.parametrize("n_samples", [50, 3000, stress_param(500000)]) @pytest.mark.parametrize("dtype", [np.int32, np.int64, np.float32]) @pytest.mark.parametrize("problem_type", ["binary", "multiclass"]) @@ -1104,6 +1107,15 @@ def prep_dense_array(array, metric, col_major=0): return np.asfortranarray(array) if col_major else array +@pytest.mark.filterwarnings( + "ignore:X was converted to boolean for metric russellrao:UserWarning" +) +@pytest.mark.filterwarnings( + "ignore:Y was converted to boolean for metric russellrao:UserWarning" +) +@pytest.mark.filterwarnings( + "ignore:Data was converted to boolean for metric russellrao:sklearn.exceptions.DataConversionWarning" +) @pytest.mark.parametrize("metric", PAIRWISE_DISTANCE_METRICS.keys()) @pytest.mark.parametrize( "matrix_size", [(5, 4), (1000, 3), (2, 10), (500, 400)] @@ -1176,6 +1188,15 @@ def test_pairwise_distances(metric: str, matrix_size, is_col_major): pairwise_distances(X, Y, metric=metric.capitalize()) +@pytest.mark.filterwarnings( + "ignore:X was converted to boolean for metric russellrao:UserWarning" +) +@pytest.mark.filterwarnings( + "ignore:Y was converted to boolean for metric russellrao:UserWarning" +) +@pytest.mark.filterwarnings( + "ignore:Data was converted to boolean for metric russellrao:sklearn.exceptions.DataConversionWarning" +) @pytest.mark.parametrize("metric", PAIRWISE_DISTANCE_METRICS.keys()) @pytest.mark.parametrize( "matrix_size", @@ -1223,6 +1244,15 @@ def test_pairwise_distances_sklearn_comparison(metric: str, matrix_size): cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision) +@pytest.mark.filterwarnings( + "ignore:X was converted to boolean for metric russellrao:UserWarning" +) +@pytest.mark.filterwarnings( + "ignore:Y was converted to boolean for metric russellrao:UserWarning" +) +@pytest.mark.filterwarnings( + "ignore:Data was converted to boolean for metric russellrao:sklearn.exceptions.DataConversionWarning" +) @pytest.mark.parametrize("metric", PAIRWISE_DISTANCE_METRICS.keys()) def test_pairwise_distances_one_dimension_order(metric: str): # Test the pairwise_distance helper function for 1 dimensional cases which diff --git a/python/cuml/tests/test_naive_bayes.py b/python/cuml/tests/test_naive_bayes.py index b0b1d7a875..a71965ab24 100644 --- a/python/cuml/tests/test_naive_bayes.py +++ b/python/cuml/tests/test_naive_bayes.py @@ -496,6 +496,10 @@ def test_gaussian_parameters(priors, var_smoothing, nlp_20news): assert_array_equal(y_hat, y_hat_sk) +@pytest.mark.filterwarnings("ignore:X dtype is not int32.*:UserWarning") +@pytest.mark.filterwarnings( + "ignore:Changing the sparsity structure of a csr_matrix is expensive.*:scipy.sparse._base.SparseEfficiencyWarning" +) @pytest.mark.parametrize("x_dtype", [cp.int32, cp.float32, cp.float64]) @pytest.mark.parametrize("y_dtype", [cp.int32, cp.int64]) @pytest.mark.parametrize("is_sparse", [True, False]) @@ -535,6 +539,10 @@ def test_categorical(x_dtype, y_dtype, is_sparse, nlp_20news): assert sk_score - THRES <= cuml_score <= sk_score + THRES +@pytest.mark.filterwarnings("ignore:X dtype is not int32.*:UserWarning") +@pytest.mark.filterwarnings( + "ignore:Changing the sparsity structure of a csr_matrix is expensive.*:scipy.sparse._base.SparseEfficiencyWarning" +) @pytest.mark.parametrize("x_dtype", [cp.int32, cp.float32, cp.float64]) @pytest.mark.parametrize("y_dtype", [cp.int32, cp.int64]) @pytest.mark.parametrize("is_sparse", [True, False]) @@ -579,6 +587,10 @@ def test_categorical_partial_fit(x_dtype, y_dtype, is_sparse, nlp_20news): assert expected_score - THRES <= cuml_score <= expected_score + THRES +@pytest.mark.filterwarnings("ignore:X dtype is not int32.*:UserWarning") +@pytest.mark.filterwarnings( + "ignore:Changing the sparsity structure of a csr_matrix is expensive.*:scipy.sparse._base.SparseEfficiencyWarning" +) @pytest.mark.parametrize("class_prior", [None, "balanced", "unbalanced"]) @pytest.mark.parametrize("alpha", [0.1, 0.5, 1.5]) @pytest.mark.parametrize("fit_prior", [False, True]) diff --git a/python/cuml/tests/test_nearest_neighbors.py b/python/cuml/tests/test_nearest_neighbors.py index 949e8a2cdb..ef6de85646 100644 --- a/python/cuml/tests/test_nearest_neighbors.py +++ b/python/cuml/tests/test_nearest_neighbors.py @@ -378,6 +378,9 @@ def test_knn_separate_index_search(input_type, nrows, n_feats, k, metric): @pytest.mark.parametrize("input_type", ["dataframe", "ndarray"]) @pytest.mark.parametrize("nrows", [unit_param(500), stress_param(70000)]) +@pytest.mark.filterwarnings( + "ignore:algorithm='rbc' requires sqrt\\(n_samples\\) >= n_neighbors.*:UserWarning" +) @pytest.mark.parametrize("n_feats", [unit_param(3), stress_param(1000)]) @pytest.mark.parametrize( "k", [unit_param(3), unit_param(35), stress_param(50)] diff --git a/python/cuml/tests/test_preprocessing.py b/python/cuml/tests/test_preprocessing.py index ef13e8ad34..bdb9571309 100644 --- a/python/cuml/tests/test_preprocessing.py +++ b/python/cuml/tests/test_preprocessing.py @@ -917,6 +917,9 @@ def test_function_transformer_sparse(sparse_clf_dataset): # noqa: F811 assert_allclose(r_X, sk_r_X) +@pytest.mark.filterwarnings( + "ignore:'ignore_implicit_zeros' takes effect only with sparse matrix.*:UserWarning" +) @pytest.mark.parametrize("n_quantiles", [30, 100]) @pytest.mark.parametrize("output_distribution", ["uniform", "normal"]) @pytest.mark.parametrize("ignore_implicit_zeros", [False, True]) @@ -1033,6 +1036,12 @@ def test_quantile_transformer_sparse( assert_allclose(r_X, sk_r_X) +@pytest.mark.filterwarnings( + "ignore:'ignore_implicit_zeros' takes effect only with sparse matrix.*:UserWarning" +) +@pytest.mark.filterwarnings( + "ignore:n_quantiles .* is greater than the total number of samples.*:UserWarning" +) @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize("n_quantiles", [30, 100]) @pytest.mark.parametrize("output_distribution", ["uniform", "normal"]) diff --git a/python/cuml/tests/test_sklearn_compatibility.py b/python/cuml/tests/test_sklearn_compatibility.py index 1cd16d3735..d23b85b9f3 100644 --- a/python/cuml/tests/test_sklearn_compatibility.py +++ b/python/cuml/tests/test_sklearn_compatibility.py @@ -840,6 +840,21 @@ def _check_name(check): ], expected_failed_checks=get_xfails, ) +@pytest.mark.filterwarnings( + "ignore:ValueError occurred during set_params.*:UserWarning" +) +@pytest.mark.filterwarnings( + "ignore:TypeError occurred during set_params.*:UserWarning" +) +@pytest.mark.filterwarnings( + "ignore:perplexity.*should be less than n_samples.*:UserWarning" +) +@pytest.mark.filterwarnings( + "ignore:Estimator's parameters changed after set_params raised ValueError:UserWarning" +) +@pytest.mark.filterwarnings("ignore:Changing solver to 'svd'.*:UserWarning") +@pytest.mark.filterwarnings("ignore:The number of bins.*:UserWarning") +@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning") def test_sklearn_compatible_estimator(estimator, check): # Check that all estimators pass the "common estimator" checks # provided by scikit-learn diff --git a/python/cuml/tests/test_text_feature_extraction.py b/python/cuml/tests/test_text_feature_extraction.py index 016479df4e..e50a70705b 100644 --- a/python/cuml/tests/test_text_feature_extraction.py +++ b/python/cuml/tests/test_text_feature_extraction.py @@ -473,6 +473,9 @@ def test_hashingvectorizer(): @pytest.mark.xfail +@pytest.mark.filterwarnings( + "ignore:The parameter 'token_pattern' will not be used:UserWarning:sklearn" +) def test_vectorizer_empty_token_case(): """ We ignore empty tokens right now but sklearn treats them as a character diff --git a/python/cuml/tests/test_trustworthiness.py b/python/cuml/tests/test_trustworthiness.py index 54d42ec269..969b269565 100644 --- a/python/cuml/tests/test_trustworthiness.py +++ b/python/cuml/tests/test_trustworthiness.py @@ -27,6 +27,9 @@ @pytest.mark.parametrize("n_features", [10, 100]) @pytest.mark.parametrize("n_components", [2, 8]) @pytest.mark.parametrize("batch_size", [128, 1024]) +@pytest.mark.filterwarnings( + "ignore:n_jobs value.*overridden.*by setting random_state.*:UserWarning" +) def test_trustworthiness( input_type, n_samples, n_features, n_components, batch_size ): diff --git a/python/cuml/tests/test_umap.py b/python/cuml/tests/test_umap.py index f2fa3690cb..ff1e7e7e43 100644 --- a/python/cuml/tests/test_umap.py +++ b/python/cuml/tests/test_umap.py @@ -683,6 +683,12 @@ def correctness_sparse(a, b, atol=0.1, rtol=0.2, threshold=0.95): @pytest.mark.parametrize("n_rows", [200, 800]) @pytest.mark.parametrize("n_features", [8, 32]) @pytest.mark.parametrize("n_neighbors", [8, 16]) +@pytest.mark.filterwarnings( + "ignore:Spectral initialisation failed.*:UserWarning" +) +@pytest.mark.filterwarnings( + "ignore:Graph is not fully connected.*:UserWarning" +) def test_fuzzy_simplicial_set(n_rows, n_features, n_neighbors): n_clusters = 30 random_state = 42 @@ -738,6 +744,9 @@ def test_fuzzy_simplicial_set(n_rows, n_features, n_neighbors): ("canberra", "nn_descent", False), ], ) +@pytest.mark.filterwarnings( + "ignore:gradient function is not yet implemented.*:UserWarning" +) def test_umap_distance_metrics_fit_transform_trust( metric, build_algo, supported ): @@ -794,6 +803,9 @@ def test_umap_distance_metrics_fit_transform_trust( ("canberra", True, True), ], ) +@pytest.mark.filterwarnings( + "ignore:gradient function is not yet implemented.*:UserWarning" +) def test_umap_distance_metrics_fit_transform_trust_on_sparse_input( metric, supported, umap_learn_supported ):