diff --git a/pyproject.toml b/pyproject.toml index 01281a9ce72..90a2b8505f2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -254,6 +254,7 @@ extend-select = [ "ISC", # flake8-implicit-str-concat "PIE", # flake8-pie "TID", # flake8-tidy-imports (absolute imports) + "PYI", # flake8-pyi "I", # isort "PERF", # Perflint "W", # pycodestyle warnings @@ -268,6 +269,8 @@ extend-safe-fixes = [ ignore = [ "C40", # unnecessary generator, comprehension, or literal "PIE790", # unnecessary pass statement + "PYI019", # use `Self` instead of custom TypeVar + "PYI041", # use `float` instead of `int | float` "PERF203", # try-except within a loop incurs performance overhead "E402", # module level import not at top of file "E731", # do not assign a lambda expression, use a def @@ -283,6 +286,9 @@ ignore = [ [tool.ruff.lint.per-file-ignores] # don't enforce absolute imports "asv_bench/**" = ["TID252"] +# looks like ruff bugs +"xarray/core/_typed_ops.py" = ["PYI034"] +"xarray/namedarray/_typing.py" = ["PYI018", "PYI046"] [tool.ruff.lint.isort] known-first-party = ["xarray"] diff --git a/xarray/backends/api.py b/xarray/backends/api.py index f30f4e54705..cf6d8e525e5 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -71,7 +71,7 @@ T_NetcdfEngine = Literal["netcdf4", "scipy", "h5netcdf"] T_Engine = Union[ T_NetcdfEngine, - Literal["pydap", "zarr"], + Literal["pydap", "zarr"], # noqa: PYI051 type[BackendEntrypoint], str, # no nice typing support for custom backends None, @@ -710,8 +710,8 @@ def open_dataset( def open_dataarray( filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - engine: T_Engine | None = None, - chunks: T_Chunks | None = None, + engine: T_Engine = None, + chunks: T_Chunks = None, cache: bool | None = None, decode_cf: bool | None = None, mask_and_scale: bool | None = None, @@ -1394,7 +1394,7 @@ def open_mfdataset( | os.PathLike | ReadBuffer | NestedSequence[str | os.PathLike | ReadBuffer], - chunks: T_Chunks | None = None, + chunks: T_Chunks = None, concat_dim: ( str | DataArray @@ -1406,7 +1406,7 @@ def open_mfdataset( ) = None, compat: CompatOptions = "no_conflicts", preprocess: Callable[[Dataset], Dataset] | None = None, - engine: T_Engine | None = None, + engine: T_Engine = None, data_vars: Literal["all", "minimal", "different"] | list[str] = "all", coords="different", combine: Literal["by_coords", "nested"] = "by_coords", diff --git a/xarray/computation/apply_ufunc.py b/xarray/computation/apply_ufunc.py index 41f4adb5fd5..1d7ceefb859 100644 --- a/xarray/computation/apply_ufunc.py +++ b/xarray/computation/apply_ufunc.py @@ -16,16 +16,14 @@ Iterator, Mapping, Sequence, - Set, ) -from typing import TYPE_CHECKING, Any, Literal, TypeVar, Union +from collections.abc import ( + Set as AbstractSet, +) +from typing import TYPE_CHECKING, Any, Literal import numpy as np -_T = TypeVar("_T", bound=Union["Dataset", "DataArray"]) -_U = TypeVar("_U", bound=Union["Dataset", "DataArray"]) -_V = TypeVar("_V", bound=Union["Dataset", "DataArray"]) - from xarray.core import duck_array_ops, utils from xarray.core.formatting import limit_lines from xarray.core.indexes import Index, filter_indexes_from_coords @@ -200,7 +198,7 @@ def _get_coords_list(args: Iterable[Any]) -> list[Coordinates]: def build_output_coords_and_indexes( args: Iterable[Any], signature: _UFuncSignature, - exclude_dims: Set = frozenset(), + exclude_dims: AbstractSet = frozenset(), combine_attrs: CombineAttrsOptions = "override", ) -> tuple[list[dict[Any, Variable]], list[dict[Any, Index]]]: """Build output coordinates and indexes for an operation. @@ -616,7 +614,7 @@ def apply_groupby_func(func, *args): def unified_dim_sizes( - variables: Iterable[Variable], exclude_dims: Set = frozenset() + variables: Iterable[Variable], exclude_dims: AbstractSet = frozenset() ) -> dict[Hashable, int]: dim_sizes: dict[Hashable, int] = {} @@ -896,7 +894,7 @@ def apply_ufunc( *args: Any, input_core_dims: Sequence[Sequence] | None = None, output_core_dims: Sequence[Sequence] | None = ((),), - exclude_dims: Set = frozenset(), + exclude_dims: AbstractSet = frozenset(), vectorize: bool = False, join: JoinOptions = "exact", dataset_join: str = "exact", diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 72b9710372f..0a5f34a3c46 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -5,6 +5,7 @@ import numpy as np import pandas as pd +from typing_extensions import Self from xarray.coding.calendar_ops import _decimal_year from xarray.coding.times import infer_calendar_name @@ -650,7 +651,7 @@ def total_seconds(self) -> T_DataArray: class CombinedDatetimelikeAccessor( DatetimeAccessor[T_DataArray], TimedeltaAccessor[T_DataArray] ): - def __new__(cls, obj: T_DataArray) -> CombinedDatetimelikeAccessor: + def __new__(cls, obj: T_DataArray) -> Self: # CombinedDatetimelikeAccessor isn't really instantiated. Instead # we need to choose which parent (datetime or timedelta) is # appropriate. Since we're checking the dtypes anyway, we'll just diff --git a/xarray/core/datatree_render.py b/xarray/core/datatree_render.py index f1042d9eeef..b88b4d7162e 100644 --- a/xarray/core/datatree_render.py +++ b/xarray/core/datatree_render.py @@ -8,15 +8,18 @@ from __future__ import annotations -from collections import namedtuple from collections.abc import Iterable, Iterator from math import ceil -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, NamedTuple if TYPE_CHECKING: from xarray.core.datatree import DataTree -Row = namedtuple("Row", ("pre", "fill", "node")) + +class Row(NamedTuple): + pre: str + fill: str + node: DataTree | str class AbstractStyle: diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index b9abf4fef2d..70c950a858a 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -1657,7 +1657,7 @@ class Indexes(collections.abc.Mapping, Generic[T_PandasOrXarrayIndex]): """ - _index_type: type[Index] | type[pd.Index] + _index_type: type[Index | pd.Index] _indexes: dict[Any, T_PandasOrXarrayIndex] _variables: dict[Any, Variable] @@ -1675,7 +1675,7 @@ def __init__( self, indexes: Mapping[Any, T_PandasOrXarrayIndex] | None = None, variables: Mapping[Any, Variable] | None = None, - index_type: type[Index] | type[pd.Index] = Index, + index_type: type[Index | pd.Index] = Index, ): """Constructor not for public consumption. diff --git a/xarray/core/types.py b/xarray/core/types.py index 5c7c8eaafd0..1e5ae9aa342 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -214,7 +214,7 @@ def copy( # FYI in some cases we don't allow `None`, which this doesn't take account of. # FYI the `str` is for a size string, e.g. "16MB", supported by dask. -T_ChunkDim: TypeAlias = str | int | Literal["auto"] | None | tuple[int, ...] +T_ChunkDim: TypeAlias = str | int | Literal["auto"] | None | tuple[int, ...] # noqa: PYI051 T_ChunkDimFreq: TypeAlias = Union["TimeResampler", T_ChunkDim] T_ChunksFreq: TypeAlias = T_ChunkDim | Mapping[Any, T_ChunkDimFreq] # We allow the tuple form of this (though arguably we could transition to named dims only) @@ -329,7 +329,7 @@ def mode(self) -> str: # for _get_filepath_or_buffer ... - def seek(self, __offset: int, __whence: int = ...) -> int: + def seek(self, offset: int, whence: int = ..., /) -> int: # with one argument: gzip.GzipFile, bz2.BZ2File # with two arguments: zip.ZipFile, read_sas ... @@ -345,7 +345,7 @@ def tell(self) -> int: @runtime_checkable class ReadBuffer(BaseBuffer, Protocol[AnyStr_co]): - def read(self, __n: int = ...) -> AnyStr_co: + def read(self, n: int = ..., /) -> AnyStr_co: # for BytesIOWrapper, gzip.GzipFile, bz2.BZ2File ... diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 7ede0bb9f08..c792e4ce60f 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -61,9 +61,11 @@ MutableMapping, MutableSet, Sequence, - Set, ValuesView, ) +from collections.abc import ( + Set as AbstractSet, +) from enum import Enum from pathlib import Path from types import EllipsisType, ModuleType @@ -1055,7 +1057,7 @@ def parse_ordered_dims( ) -def _check_dims(dim: Set[Hashable], all_dims: Set[Hashable]) -> None: +def _check_dims(dim: AbstractSet[Hashable], all_dims: AbstractSet[Hashable]) -> None: wrong_dims = (dim - all_dims) - {...} if wrong_dims: wrong_dims_str = ", ".join(f"'{d}'" for d in wrong_dims) diff --git a/xarray/groupers.py b/xarray/groupers.py index 0ae8e3c87cd..46dc47582a3 100644 --- a/xarray/groupers.py +++ b/xarray/groupers.py @@ -902,7 +902,7 @@ def factorize(self, group: T_Group) -> EncodedGroups: first_items = agged["first"] counts = agged["count"] - index_class: type[CFTimeIndex] | type[pd.DatetimeIndex] + index_class: type[CFTimeIndex | pd.DatetimeIndex] if _contains_cftime_datetimes(group.data): index_class = CFTimeIndex datetime_class = type(first_n_items(group.data, 1).item()) diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index 95e7d7adfc3..2dba06a5d44 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -39,7 +39,6 @@ class Default(Enum): _default = Default.token # https://stackoverflow.com/questions/74633074/how-to-type-hint-a-generic-numpy-array -_T = TypeVar("_T") _T_co = TypeVar("_T_co", covariant=True) _dtype = np.dtype @@ -79,7 +78,7 @@ def dtype(self) -> _DType_co: ... _NormalizedChunks = tuple[tuple[int, ...], ...] # FYI in some cases we don't allow `None`, which this doesn't take account of. # # FYI the `str` is for a size string, e.g. "16MB", supported by dask. -T_ChunkDim: TypeAlias = str | int | Literal["auto"] | None | tuple[int, ...] +T_ChunkDim: TypeAlias = str | int | Literal["auto"] | None | tuple[int, ...] # noqa: PYI051 # We allow the tuple form of this (though arguably we could transition to named dims only) T_Chunks: TypeAlias = T_ChunkDim | Mapping[Any, T_ChunkDim] diff --git a/xarray/namedarray/dtypes.py b/xarray/namedarray/dtypes.py index a29fbdfd41c..a49f7686179 100644 --- a/xarray/namedarray/dtypes.py +++ b/xarray/namedarray/dtypes.py @@ -13,19 +13,19 @@ @functools.total_ordering class AlwaysGreaterThan: - def __gt__(self, other: Any) -> Literal[True]: + def __gt__(self, other: object) -> Literal[True]: return True - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: return isinstance(other, type(self)) @functools.total_ordering class AlwaysLessThan: - def __lt__(self, other: Any) -> Literal[True]: + def __lt__(self, other: object) -> Literal[True]: return True - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: return isinstance(other, type(self)) diff --git a/xarray/structure/chunks.py b/xarray/structure/chunks.py index 2c993137996..e6dcd7b8b83 100644 --- a/xarray/structure/chunks.py +++ b/xarray/structure/chunks.py @@ -167,15 +167,15 @@ def _maybe_chunk( @overload -def unify_chunks(__obj: _T) -> tuple[_T]: ... +def unify_chunks(obj: _T, /) -> tuple[_T]: ... @overload -def unify_chunks(__obj1: _T, __obj2: _U) -> tuple[_T, _U]: ... +def unify_chunks(obj1: _T, obj2: _U, /) -> tuple[_T, _U]: ... @overload -def unify_chunks(__obj1: _T, __obj2: _U, __obj3: _V) -> tuple[_T, _U, _V]: ... +def unify_chunks(obj1: _T, obj2: _U, obj3: _V, /) -> tuple[_T, _U, _V]: ... @overload diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index ca1e5ccb2bb..b2a459ba652 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -1,7 +1,8 @@ from __future__ import annotations from collections import defaultdict -from collections.abc import Hashable, Iterable, Mapping, Sequence, Set +from collections.abc import Hashable, Iterable, Mapping, Sequence +from collections.abc import Set as AbstractSet from typing import TYPE_CHECKING, Any, NamedTuple, Union import pandas as pd @@ -396,7 +397,7 @@ def collect_from_coordinates( def merge_coordinates_without_align( objects: list[Coordinates], prioritized: Mapping[Any, MergeElement] | None = None, - exclude_dims: Set = frozenset(), + exclude_dims: AbstractSet = frozenset(), combine_attrs: CombineAttrsOptions = "override", ) -> tuple[dict[Hashable, Variable], dict[Hashable, Index]]: """Merge variables/indexes from coordinates without automatic alignments. diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 6a14142a7ff..9a6c2248233 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -6215,9 +6215,7 @@ def test_h5netcdf_entrypoint(tmp_path: Path) -> None: @requires_netCDF4 @pytest.mark.parametrize("str_type", (str, np.str_)) -def test_write_file_from_np_str( - str_type: type[str] | type[np.str_], tmpdir: str -) -> None: +def test_write_file_from_np_str(str_type: type[str | np.str_], tmpdir: str) -> None: # https://github.com/pydata/xarray/pull/5264 scenarios = [str_type(v) for v in ["scenario_a", "scenario_b", "scenario_c"]] years = range(2015, 2100 + 1) diff --git a/xarray/util/generate_aggregations.py b/xarray/util/generate_aggregations.py index 089ef558581..8812a1abb22 100644 --- a/xarray/util/generate_aggregations.py +++ b/xarray/util/generate_aggregations.py @@ -13,9 +13,9 @@ """ -import collections import textwrap from dataclasses import dataclass, field +from typing import NamedTuple MODULE_PREAMBLE = '''\ """Mixin classes with reduction operations.""" @@ -227,7 +227,14 @@ def {method}( and better supported. ``cumsum`` and ``cumprod`` may be deprecated in the future.""" -ExtraKwarg = collections.namedtuple("ExtraKwarg", "docs kwarg call example") + +class ExtraKwarg(NamedTuple): + docs: str + kwarg: str + call: str + example: str + + skipna = ExtraKwarg( docs=_SKIPNA_DOCSTRING, kwarg="skipna: bool | None = None,",