Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion xarray/core/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@
VarCompatible = Union["Variable", "ScalarOrArray"]
GroupByIncompatible = Union["Variable", "GroupBy"]

Dims = Union[str, Iterable[Hashable], None]
Dims = Union[str, Iterable[Hashable], "ellipsis", None]
OrderedDims = Union[str, Sequence[Union[Hashable, "ellipsis"]], "ellipsis", None]

ErrorOptions = Literal["raise", "ignore"]
ErrorOptionsWithWarn = Literal["raise", "warn", "ignore"]
Expand Down
165 changes: 160 additions & 5 deletions xarray/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@
Hashable,
Iterable,
Iterator,
Literal,
Mapping,
MutableMapping,
MutableSet,
Sequence,
TypeVar,
cast,
overload,
Expand All @@ -33,7 +35,7 @@
import pandas as pd

if TYPE_CHECKING:
from .types import ErrorOptionsWithWarn
from .types import Dims, ErrorOptionsWithWarn, OrderedDims

K = TypeVar("K")
V = TypeVar("V")
Expand Down Expand Up @@ -894,15 +896,17 @@ def drop_dims_from_indexers(


def drop_missing_dims(
supplied_dims: Collection, dims: Collection, missing_dims: ErrorOptionsWithWarn
) -> Collection:
supplied_dims: Iterable[Hashable],
dims: Iterable[Hashable],
missing_dims: ErrorOptionsWithWarn,
) -> Iterable[Hashable]:
"""Depending on the setting of missing_dims, drop any dimensions from supplied_dims that
are not present in dims.

Parameters
----------
supplied_dims : dict
dims : sequence
supplied_dims : Iterable of Hashable
dims : Iterable of Hashable
missing_dims : {"raise", "warn", "ignore"}
"""

Expand Down Expand Up @@ -935,6 +939,157 @@ def drop_missing_dims(
)


T_None = TypeVar("T_None", None, "ellipsis")


@overload
def parse_dims(
dim: str | Iterable[Hashable] | T_None,
all_dims: tuple[Hashable, ...],
*,
check_exists: bool = True,
replace_none: Literal[True] = True,
) -> tuple[Hashable, ...]:
...


@overload
def parse_dims(
dim: str | Iterable[Hashable] | T_None,
all_dims: tuple[Hashable, ...],
*,
check_exists: bool = True,
replace_none: Literal[False],
) -> tuple[Hashable, ...] | T_None:
...


def parse_dims(
dim: Dims,
all_dims: tuple[Hashable, ...],
*,
check_exists: bool = True,
replace_none: bool = True,
) -> tuple[Hashable, ...] | None | ellipsis:
"""Parse one or more dimensions.

A single dimension must be always a str, multiple dimensions
can be Hashables. This supports e.g. using a tuple as a dimension.
If you supply e.g. a set of dimensions the order cannot be
conserved, but for sequences it will be.

Parameters
----------
dim : str, Iterable of Hashable, "..." or None
Dimension(s) to parse.
all_dims : tuple of Hashable
All possible dimensions.
check_exists: bool, default: True
if True, check if dim is a subset of all_dims.
replace_none : bool, default: True
If True, return all_dims if dim is None or "...".

Returns
-------
parsed_dims : tuple of Hashable
Input dimensions as a tuple.
"""
if dim is None or dim is ...:
if replace_none:
return all_dims
return dim
if isinstance(dim, str):
dim = (dim,)
if check_exists:
_check_dims(set(dim), set(all_dims))
return tuple(dim)


@overload
def parse_ordered_dims(
dim: str | Sequence[Hashable | ellipsis] | T_None,
all_dims: tuple[Hashable, ...],
*,
check_exists: bool = True,
replace_none: Literal[True] = True,
) -> tuple[Hashable, ...]:
...


@overload
def parse_ordered_dims(
dim: str | Sequence[Hashable | ellipsis] | T_None,
all_dims: tuple[Hashable, ...],
*,
check_exists: bool = True,
replace_none: Literal[False],
) -> tuple[Hashable, ...] | T_None:
...


def parse_ordered_dims(
dim: OrderedDims,
all_dims: tuple[Hashable, ...],
*,
check_exists: bool = True,
replace_none: bool = True,
) -> tuple[Hashable, ...] | None | ellipsis:
"""Parse one or more dimensions.

A single dimension must be always a str, multiple dimensions
can be Hashables. This supports e.g. using a tuple as a dimension.
An ellipsis ("...") in a sequence of dimensions will be
replaced with all remaining dimensions. This only makes sense when
the input is a sequence and not e.g. a set.

Parameters
----------
dim : str, Sequence of Hashable or "...", "..." or None
Dimension(s) to parse. If "..." appears in a Sequence
it always gets replaced with all remaining dims
all_dims : tuple of Hashable
All possible dimensions.
check_exists: bool, default: True
if True, check if dim is a subset of all_dims.
replace_none : bool, default: True
If True, return all_dims if dim is None.

Returns
-------
parsed_dims : tuple of Hashable
Input dimensions as a tuple.
"""
if dim is not None and dim is not ... and not isinstance(dim, str) and ... in dim:
dims_set: set[Hashable | ellipsis] = set(dim)
all_dims_set = set(all_dims)
if check_exists:
_check_dims(dims_set, all_dims_set)
if len(all_dims_set) != len(all_dims):
raise ValueError("Cannot use ellipsis with repeated dims")
dims = tuple(dim)
if dims.count(...) > 1:
raise ValueError("More than one ellipsis supplied")
other_dims = tuple(d for d in all_dims if d not in dims_set)
idx = dims.index(...)
return dims[:idx] + other_dims + dims[idx + 1 :]
else:
return parse_dims( # type: ignore[call-overload]
dim=dim,
all_dims=all_dims,
check_exists=check_exists,
replace_none=replace_none,
)


def _check_dims(dim: set[Hashable | ellipsis], all_dims: set[Hashable]) -> None:
wrong_dims = dim - all_dims
if wrong_dims and wrong_dims != {...}:
wrong_dims_str = ", ".join(f"'{d!s}'" for d in wrong_dims)
raise ValueError(
f"Dimension(s) {wrong_dims_str} do not exist. Expected one or more of {all_dims}"
)


_Accessor = TypeVar("_Accessor")


Expand Down
86 changes: 85 additions & 1 deletion xarray/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from datetime import datetime
from typing import Hashable
from typing import Hashable, Iterable, Sequence

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -294,6 +294,90 @@ def test_infix_dims_errors(supplied, all_):
list(utils.infix_dims(supplied, all_))


@pytest.mark.parametrize(
["dim", "expected"],
[
pytest.param("a", ("a",), id="str"),
pytest.param(["a", "b"], ("a", "b"), id="list_of_str"),
pytest.param(["a", 1], ("a", 1), id="list_mixed"),
pytest.param(("a", "b"), ("a", "b"), id="tuple_of_str"),
pytest.param(["a", ("b", "c")], ("a", ("b", "c")), id="list_with_tuple"),
pytest.param((("b", "c"),), (("b", "c"),), id="tuple_of_tuple"),
pytest.param(None, None, id="None"),
pytest.param(..., ..., id="ellipsis"),
],
)
def test_parse_dims(
dim: str | Iterable[Hashable] | None,
expected: tuple[Hashable, ...],
) -> None:
all_dims = ("a", "b", 1, ("b", "c")) # selection of different Hashables
actual = utils.parse_dims(dim, all_dims, replace_none=False)
assert actual == expected


def test_parse_dims_set() -> None:
all_dims = ("a", "b", 1, ("b", "c")) # selection of different Hashables
dim = {"a", 1}
actual = utils.parse_dims(dim, all_dims)
assert set(actual) == dim


@pytest.mark.parametrize(
"dim", [pytest.param(None, id="None"), pytest.param(..., id="ellipsis")]
)
def test_parse_dims_replace_none(dim: None | ellipsis) -> None:
all_dims = ("a", "b", 1, ("b", "c")) # selection of different Hashables
actual = utils.parse_dims(dim, all_dims, replace_none=True)
assert actual == all_dims


@pytest.mark.parametrize(
"dim",
[
pytest.param("x", id="str_missing"),
pytest.param(["a", "x"], id="list_missing_one"),
pytest.param(["x", 2], id="list_missing_all"),
],
)
def test_parse_dims_raises(dim: str | Iterable[Hashable]) -> None:
all_dims = ("a", "b", 1, ("b", "c")) # selection of different Hashables
with pytest.raises(ValueError, match="'x'"):
utils.parse_dims(dim, all_dims, check_exists=True)


@pytest.mark.parametrize(
["dim", "expected"],
[
pytest.param("a", ("a",), id="str"),
pytest.param(["a", "b"], ("a", "b"), id="list"),
pytest.param([...], ("a", "b", "c"), id="list_only_ellipsis"),
pytest.param(["a", ...], ("a", "b", "c"), id="list_with_ellipsis"),
pytest.param(["a", ..., "b"], ("a", "c", "b"), id="list_with_middle_ellipsis"),
],
)
def test_parse_ordered_dims(
dim: str | Sequence[Hashable | ellipsis],
expected: tuple[Hashable, ...],
) -> None:
all_dims = ("a", "b", "c")
actual = utils.parse_ordered_dims(dim, all_dims)
assert actual == expected


def test_parse_ordered_dims_raises() -> None:
all_dims = ("a", "b", "c")

with pytest.raises(ValueError, match="'x' do not exist"):
utils.parse_ordered_dims("x", all_dims, check_exists=True)

with pytest.raises(ValueError, match="repeated dims"):
utils.parse_ordered_dims(["a", ...], all_dims + ("a",))

with pytest.raises(ValueError, match="More than one ellipsis"):
utils.parse_ordered_dims(["a", ..., "b", ...], all_dims)


@pytest.mark.parametrize(
"nested_list, expected",
[
Expand Down