From 0aa73e69eed5a092a84b8a32fd122dccca86bec3 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 2 Dec 2024 19:14:02 +0800 Subject: [PATCH 1/2] Improve the workaround for handling pandas null dtypes in pandas<=2.1 --- pygmt/clib/conversion.py | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index b093109cf32..ee1855b9537 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -168,24 +168,35 @@ def _to_numpy(data: Any) -> np.ndarray: "date64[ms][pyarrow]": "datetime64[ms]", } + # The expected numpy dtype for the result numpy array, but can be None. + dtype = dtypes.get(str(getattr(data, "dtype", getattr(data, "type", "")))) + + # pandas numeric dtypes were converted to np.object_ dtype prior pandas 2.2, and are + # converted to suitable NumPy dtypes since pandas 2.2. Refer to the following link + # for details: https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#to-numpy-for-numpy-nullable-and-arrow-types-converts-to-suitable-numpy-dtype + # + # Workarounds for pandas < 2.2. Following SPEC 0, pandas 2.1 should be dropped in + # 2025 Q3, so it's likely we can remove the workaround in PyGMT v0.17.0. if ( - hasattr(data, "isna") - and data.isna().any() - and Version(pd.__version__) < Version("2.2") - ): - # Workaround for dealing with pd.NA with pandas < 2.2. - # Bug report at: https://github.com/GenericMappingTools/pygmt/issues/2844 - # Following SPEC0, pandas 2.1 will be dropped in 2025 Q3, so it's likely - # we can remove the workaround in PyGMT v0.17.0. - array = np.ascontiguousarray(data.astype(float)) - else: - vec_dtype = str(getattr(data, "dtype", getattr(data, "type", ""))) - array = np.ascontiguousarray(data, dtype=dtypes.get(vec_dtype)) + Version(pd.__version__) < Version("2.2") # pandas < 2.2 only. + and hasattr(data, "dtype") # NumPy array or pandas objects only. + and hasattr(data.dtype, "numpy_dtype") # pandas dtypes only. + and data.dtype.kind in "iuf" # Numeric dtypes only. + ): # pandas Series/Index with pandas nullable numeric dtypes. + dtype = data.dtype.numpy_dtype # The expected numpy dtype. + if getattr(data, "hasnans", False): + if data.dtype.kind in "iu": + # Integers with missing values are converted to float64. + dtype = np.float64 + data = data.to_numpy(na_value=np.nan) + + array = np.ascontiguousarray(data, dtype=dtype) # Check if a np.object_ array can be converted to np.str_. if array.dtype == np.object_: with contextlib.suppress(TypeError, ValueError): return np.ascontiguousarray(array, dtype=np.str_) + return array From 3f7eff2d0ca3fc7b9c5689e8676220e63654c5be Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 2 Dec 2024 23:13:07 +0800 Subject: [PATCH 2/2] Separate variable 'dtype' and 'numpy_dtype' for the input and result array --- pygmt/clib/conversion.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index ee1855b9537..5a1d1cf51b9 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -168,8 +168,10 @@ def _to_numpy(data: Any) -> np.ndarray: "date64[ms][pyarrow]": "datetime64[ms]", } - # The expected numpy dtype for the result numpy array, but can be None. - dtype = dtypes.get(str(getattr(data, "dtype", getattr(data, "type", "")))) + # The dtype for the input object. + dtype = getattr(data, "dtype", getattr(data, "type", "")) + # The numpy dtype for the result numpy array, but can be None. + numpy_dtype = dtypes.get(str(dtype)) # pandas numeric dtypes were converted to np.object_ dtype prior pandas 2.2, and are # converted to suitable NumPy dtypes since pandas 2.2. Refer to the following link @@ -183,20 +185,20 @@ def _to_numpy(data: Any) -> np.ndarray: and hasattr(data.dtype, "numpy_dtype") # pandas dtypes only. and data.dtype.kind in "iuf" # Numeric dtypes only. ): # pandas Series/Index with pandas nullable numeric dtypes. - dtype = data.dtype.numpy_dtype # The expected numpy dtype. + # The numpy dtype of the result numpy array. + numpy_dtype = data.dtype.numpy_dtype if getattr(data, "hasnans", False): if data.dtype.kind in "iu": # Integers with missing values are converted to float64. - dtype = np.float64 + numpy_dtype = np.float64 data = data.to_numpy(na_value=np.nan) - array = np.ascontiguousarray(data, dtype=dtype) + array = np.ascontiguousarray(data, dtype=numpy_dtype) # Check if a np.object_ array can be converted to np.str_. if array.dtype == np.object_: with contextlib.suppress(TypeError, ValueError): return np.ascontiguousarray(array, dtype=np.str_) - return array