diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index 4d223104ade..52a487ca46d 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -1,7 +1,5 @@ from __future__ import annotations -import warnings - import numpy as np import pandas as pd @@ -306,30 +304,19 @@ def _convert_to_new_calendar_with_new_day_of_year( return np.nan -def _yearstart_cftime(year, date_class): - return date_class(year, 1, 1) - +def _decimal_year_cftime(time, year, days_in_year, *, date_class): + year_start = date_class(year, 1, 1) + delta = np.timedelta64(time - year_start, "ns") + days_in_year = np.timedelta64(days_in_year, "D") + return year + delta / days_in_year -def _yearstart_np(year, dtype): - return np.datetime64(int(year) - 1970, "Y").astype(dtype) - -def _yearstart(times): - if times.dtype == "O": - return apply_ufunc( - _yearstart_cftime, - times.dt.year, - kwargs={"date_class": get_date_type(times.dt.calendar, True)}, - vectorize=True, - dask="parallelized", - ) - return apply_ufunc( - _yearstart_np, - times.dt.year, - kwargs={"dtype": times.dtype}, - vectorize=True, - dask="parallelized", - ) +def _decimal_year_numpy(time, year, days_in_year, *, dtype): + time = np.asarray(time).astype(dtype) + year_start = np.datetime64(int(year) - 1970, "Y").astype(dtype) + delta = time - year_start + days_in_year = np.timedelta64(days_in_year, "D") + return year + delta / days_in_year def _decimal_year(times): @@ -340,12 +327,22 @@ def _decimal_year(times): Ex: '2000-03-01 12:00' is 2000.1653 in a standard calendar, 2000.16301 in a "noleap" or 2000.16806 in a "360_day". """ - years = times.dt.year - deltas = times - _yearstart(times) - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message="Converting non-nanosecond") - days_in_years = times.dt.days_in_year.astype("timedelta64[D]") - return years + deltas / days_in_years + if times.dtype == "O": + function = _decimal_year_cftime + kwargs = {"date_class": get_date_type(times.dt.calendar, True)} + else: + function = _decimal_year_numpy + kwargs = {"dtype": times.dtype} + return apply_ufunc( + function, + times, + times.dt.year, + times.dt.days_in_year, + kwargs=kwargs, + vectorize=True, + dask="parallelized", + output_dtypes=[np.float64], + ) def interp_calendar(source, target, dim="time"): diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 8bd4eee731f..e73893d0f35 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -563,7 +563,7 @@ def days_in_year(self) -> T_DataArray: @property def decimal_year(self) -> T_DataArray: """Convert the dates as a fractional year.""" - result = _decimal_year(self._obj).astype(float) + result = _decimal_year(self._obj) newvar = Variable( dims=self._obj.dims, attrs=self._obj.attrs, diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 8e8821267aa..587f43a5d7f 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -153,14 +153,6 @@ def test_days_in_year(self, calendar, expected) -> None: == expected ).all() - @requires_cftime - def test_decimal_year(self) -> None: - h_per_yr = 366 * 24 - np.testing.assert_array_equal( - self.data.time.dt.decimal_year[0:3], - [2000, 2000 + 1 / h_per_yr, 2000 + 2 / h_per_yr], - ) - def test_not_datetime_type(self) -> None: nontime_data = self.data.copy() int_data = np.arange(len(self.data.time)).astype("int8") @@ -197,7 +189,6 @@ def test_not_datetime_type(self) -> None: "is_year_end", "is_leap_year", "days_in_year", - "decimal_year", ], ) def test_dask_field_access(self, field) -> None: @@ -719,3 +710,46 @@ def test_cftime_round_accessor( result = cftime_rounding_dataarray.dt.round(freq) assert_identical(result, expected) + + +@pytest.mark.parametrize( + "use_cftime", + [False, pytest.param(True, marks=requires_cftime)], + ids=lambda x: f"use_cftime={x}", +) +@pytest.mark.parametrize( + "use_dask", + [False, pytest.param(True, marks=requires_dask)], + ids=lambda x: f"use_dask={x}", +) +def test_decimal_year(use_cftime, use_dask) -> None: + year = 2000 + periods = 10 + freq = "h" + + shape = (2, 5) + dims = ["x", "y"] + hours_in_year = 24 * 366 + + times = xr.date_range(f"{year}", periods=periods, freq=freq, use_cftime=use_cftime) + + da = xr.DataArray(times.values.reshape(shape), dims=dims) + + if use_dask: + da = da.chunk({"y": 2}) + # Computing the decimal year for a cftime datetime array requires a + # number of small computes (6): + # - 4x one compute per .dt accessor call (requires inspecting one + # object-dtype array element to see if it is time-like) + # - 2x one compute per calendar inference (requires inspecting one + # array element to read off the calendar) + max_computes = 6 * use_cftime + with raise_if_dask_computes(max_computes=max_computes): + result = da.dt.decimal_year + else: + result = da.dt.decimal_year + + expected = xr.DataArray( + year + np.arange(periods).reshape(shape) / hours_in_year, dims=dims + ) + xr.testing.assert_equal(result, expected)