diff --git a/ci/cudf_pandas_scripts/fetch_pandas_versions.py b/ci/cudf_pandas_scripts/fetch_pandas_versions.py new file mode 100644 index 00000000000..b6913f947e8 --- /dev/null +++ b/ci/cudf_pandas_scripts/fetch_pandas_versions.py @@ -0,0 +1,24 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import requests +from packaging.version import Version +from packaging.specifiers import SpecifierSet +import argparse + +def get_pandas_versions(pandas_range): + url = "https://pypi.org/pypi/pandas/json" + response = requests.get(url) + data = response.json() + versions = [Version(v) for v in data['releases']] + specifier = SpecifierSet(pandas_range.lstrip("pandas")) + matching_versions = [v for v in versions if v in specifier] + matching_minors = sorted(set(".".join((str(v.major), str(v.minor))) for v in matching_versions), key=Version) + return matching_minors + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Filter pandas versions by prefix.") + parser.add_argument("pandas_range", type=str, help="The version prefix to filter by.") + args = parser.parse_args() + + versions = get_pandas_versions(args.pandas_range) + print(','.join(versions)) diff --git a/ci/cudf_pandas_scripts/run_tests.sh b/ci/cudf_pandas_scripts/run_tests.sh index 8215ce729b3..5bfc083bcd3 100755 --- a/ci/cudf_pandas_scripts/run_tests.sh +++ b/ci/cudf_pandas_scripts/run_tests.sh @@ -9,13 +9,20 @@ RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"} RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"} mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}" +DEPENDENCIES_PATH="dependencies.yaml" +package_name="pandas" + +# Use grep to find the line containing the package name and version constraint +pandas_version_constraint=$(grep -oP "pandas>=\d+\.\d+,\<\d+\.\d+\.\d+dev\d+" $DEPENDENCIES_PATH) + # Function to display script usage function display_usage { - echo "Usage: $0 [--no-cudf]" + echo "Usage: $0 [--no-cudf] [pandas-version]" } # Default value for the --no-cudf option no_cudf=false +PANDAS_VERSION="" # Parse command-line arguments while [[ $# -gt 0 ]]; do @@ -25,9 +32,14 @@ while [[ $# -gt 0 ]]; do shift ;; *) - echo "Error: Unknown option $1" - display_usage - exit 1 + if [[ -z "$PANDAS_VERSION" ]]; then + PANDAS_VERSION=$1 + shift + else + echo "Error: Unknown option $1" + display_usage + exit 1 + fi ;; esac done @@ -53,3 +65,19 @@ python -m pytest -p cudf.pandas \ --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-pandas-coverage.xml" \ --cov-report=term \ ./python/cudf/cudf_pandas_tests/ + +output=$(python ci/cudf_pandas_scripts/fetch_pandas_versions.py $pandas_version_constraint) + +# Convert the comma-separated list into an array +IFS=',' read -r -a versions <<< "$output" + +for version in "${versions[@]}"; do + echo "Installing pandas version: ${version}" + python -m pip install "pandas==${version}" + python -m pytest -p cudf.pandas \ + --cov-config=./python/cudf/.coveragerc \ + --cov=cudf \ + --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-pandas-coverage.xml" \ + --cov-report=term \ + ./python/cudf/cudf_pandas_tests/ +done diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 6292022d8e4..028f5f173ac 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -42,6 +42,8 @@ get_calendar, ) +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION + # Accelerated pandas has the real pandas and cudf modules as attributes pd = xpd._fsproxy_slow cudf = xpd._fsproxy_fast @@ -607,6 +609,10 @@ def test_array_function_series_fallback(series): tm.assert_equal(expect, got) +@pytest.mark.xfail( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="Fails in older versions of pandas", +) def test_timedeltaproperties(series): psr, sr = series psr, sr = psr.astype("timedelta64[ns]"), sr.astype("timedelta64[ns]") @@ -666,6 +672,10 @@ def test_maintain_container_subclasses(multiindex): assert isinstance(got, xpd.core.indexes.frozen.FrozenList) +@pytest.mark.xfail( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="Fails in older versions of pandas due to unsupported boxcar window type", +) def test_rolling_win_type(): pdf = pd.DataFrame(range(5)) df = xpd.DataFrame(range(5)) @@ -1281,6 +1291,10 @@ def max_times_two(self): assert s.max_times_two() == 6 +@pytest.mark.xfail( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="DatetimeArray.__floordiv__ missing in pandas-2.0.0", +) def test_floordiv_array_vs_df(): xarray = xpd.Series([1, 2, 3], dtype="datetime64[ns]").array parray = pd.Series([1, 2, 3], dtype="datetime64[ns]").array @@ -1552,6 +1566,10 @@ def test_numpy_cupy_flatiter(series): assert type(arr.flat._fsproxy_slow) == np.flatiter +@pytest.mark.xfail( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="pyarrow_numpy storage type was not supported in pandas-2.0.0", +) def test_arrow_string_arrays(): cu_s = xpd.Series(["a", "b", "c"]) pd_s = pd.Series(["a", "b", "c"]) diff --git a/python/cudf/cudf_pandas_tests/test_profiler.py b/python/cudf/cudf_pandas_tests/test_profiler.py index 588398265f2..5b7bde06d1d 100644 --- a/python/cudf/cudf_pandas_tests/test_profiler.py +++ b/python/cudf/cudf_pandas_tests/test_profiler.py @@ -5,6 +5,8 @@ import os import subprocess +import pytest + from cudf.pandas import LOADED, Profiler if not LOADED: @@ -13,7 +15,13 @@ import numpy as np import pandas as pd +from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION + +@pytest.mark.skipif( + PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + reason="function names change across versions of pandas, so making sure it only runs on latest version of pandas", +) def test_profiler(): np.random.seed(42) with Profiler() as profiler: