diff --git a/.github/workflows/ci_doctests.yaml b/.github/workflows/ci_doctests.yaml index 0a139e02d57..2beee329757 100644 --- a/.github/workflows/ci_doctests.yaml +++ b/.github/workflows/ci_doctests.yaml @@ -58,6 +58,7 @@ jobs: contextily geopandas ipython + pyarrow rioxarray build make diff --git a/doc/install.rst b/doc/install.rst index f3594e52521..504eb87a911 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -112,8 +112,8 @@ The following are optional dependencies: If you have `PyArrow `__ installed, PyGMT does have some initial support for ``pandas.Series`` and ``pandas.DataFrame`` objects with Apache Arrow-backed arrays. Specifically, - only uint/int/float dtypes are supported for now. Support for datetime and - string Arrow dtypes are still working in progress. For more details, see + only uint/int/float and date32/date64 dtypes are supported for now. Support + for string Arrow dtypes is still a work in progress. For more details, see `issue #2800 `__. Installing GMT and other dependencies diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index 04264739e8f..2ce0299ef3b 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -162,11 +162,42 @@ def vectors_to_arrays(vectors): True >>> all(isinstance(i, np.ndarray) for i in arrays) True + >>> data = [[1, 2], (3, 4), range(5, 7)] >>> all(isinstance(i, np.ndarray) for i in vectors_to_arrays(data)) True + + >>> import datetime + >>> import pytest + >>> pa = pytest.importorskip("pyarrow") + >>> vectors = [ + ... pd.Series( + ... data=[datetime.date(2020, 1, 1), datetime.date(2021, 12, 31)], + ... dtype="date32[day][pyarrow]", + ... ), + ... pd.Series( + ... data=[datetime.date(2022, 1, 1), datetime.date(2023, 12, 31)], + ... dtype="date64[ms][pyarrow]", + ... ), + ... ] + >>> arrays = vectors_to_arrays(vectors) + >>> all(a.flags.c_contiguous for a in arrays) + True + >>> all(isinstance(a, np.ndarray) for a in arrays) + True + >>> all(isinstance(a.dtype, np.dtypes.DateTime64DType) for a in arrays) + True """ - arrays = [as_c_contiguous(np.asarray(i)) for i in vectors] + dtypes = { + "date32[day][pyarrow]": np.datetime64, + "date64[ms][pyarrow]": np.datetime64, + } + arrays = [] + for vector in vectors: + vec_dtype = str(getattr(vector, "dtype", "")) + array = np.asarray(a=vector, dtype=dtypes.get(vec_dtype, None)) + arrays.append(as_c_contiguous(array)) + return arrays diff --git a/pygmt/tests/test_info.py b/pygmt/tests/test_info.py index b1c247e5bd3..3ddbbe9bca0 100644 --- a/pygmt/tests/test_info.py +++ b/pygmt/tests/test_info.py @@ -119,14 +119,22 @@ def test_info_numpy_array_time_column(): assert output == expected_output -def test_info_pandas_dataframe_time_column(): +@pytest.mark.parametrize( + "dtype", + [ + "datetime64[ns]", + pytest.param("date32[day][pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("date64[ms][pyarrow]", marks=skip_if_no(package="pyarrow")), + ], +) +def test_info_pandas_dataframe_date_column(dtype): """ - Make sure info works on pandas.DataFrame inputs with a time column. + Make sure info works on pandas.DataFrame inputs with a date column. """ table = pd.DataFrame( data={ "z": [10, 13, 12, 15, 14], - "time": pd.date_range(start="2020-01-01", periods=5), + "date": pd.date_range(start="2020-01-01", periods=5).astype(dtype=dtype), } ) output = info(data=table)