Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1644,6 +1644,7 @@ Exceptions
.. autosummary::
:toctree: generated/

AlignmentError
MergeError
SerializationWarning

Expand Down
3 changes: 2 additions & 1 deletion xarray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
)
from xarray.core.variable import IndexVariable, Variable, as_variable
from xarray.namedarray.core import NamedArray
from xarray.structure.alignment import align, broadcast
from xarray.structure.alignment import AlignmentError, align, broadcast
from xarray.structure.chunks import unify_chunks
from xarray.structure.combine import combine_by_coords, combine_nested
from xarray.structure.concat import concat
Expand Down Expand Up @@ -128,6 +128,7 @@
"NamedArray",
"Variable",
# Exceptions
"AlignmentError",
"InvalidTreeError",
"MergeError",
"NotFoundInTreeError",
Expand Down
118 changes: 51 additions & 67 deletions xarray/structure/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@
)


class AlignmentError(ValueError):
"""Error class for alignment failures due to incompatible arguments."""


def reindex_variables(
variables: Mapping[Any, Variable],
dim_pos_indexers: Mapping[Any, Any],
Expand Down Expand Up @@ -196,7 +200,7 @@ def _normalize_indexes(
for k, idx in indexes.items():
if not isinstance(idx, Index):
if getattr(idx, "dims", (k,)) != (k,):
raise ValueError(
raise AlignmentError(
f"Indexer has dimensions {idx.dims} that are different "
f"from that to be indexed along '{k}'"
)
Expand Down Expand Up @@ -227,7 +231,7 @@ def _normalize_indexes(
elif exclude_dims:
excl_dims_str = ", ".join(str(d) for d in exclude_dims)
incl_dims_str = ", ".join(str(d) for d in all_dims - exclude_dims)
raise ValueError(
raise AlignmentError(
f"cannot exclude dimension(s) {excl_dims_str} from alignment because "
"these are used by an index together with non-excluded dimensions "
f"{incl_dims_str}"
Expand Down Expand Up @@ -268,7 +272,7 @@ def find_matching_indexes(self) -> None:
for dim_sizes in all_indexes_dim_sizes.values():
for dim, sizes in dim_sizes.items():
if len(sizes) > 1:
raise ValueError(
raise AlignmentError(
"cannot align objects with join='override' with matching indexes "
f"along dimension {dim!r} that don't have the same size"
)
Expand All @@ -283,47 +287,6 @@ def find_matching_unindexed_dims(self) -> None:

self.unindexed_dim_sizes = unindexed_dim_sizes

def assert_no_index_conflict(self) -> None:
"""Check for uniqueness of both coordinate and dimension names across all sets
of matching indexes.

We need to make sure that all indexes used for re-indexing or alignment
are fully compatible and do not conflict each other.

Note: perhaps we could choose less restrictive constraints and instead
check for conflicts among the dimension (position) indexers returned by
`Index.reindex_like()` for each matching pair of object index / aligned
index?
(ref: https://github.com/pydata/xarray/issues/1603#issuecomment-442965602)

"""
matching_keys = set(self.all_indexes) | set(self.indexes)

coord_count: dict[Hashable, int] = defaultdict(int)
dim_count: dict[Hashable, int] = defaultdict(int)
for coord_names_dims, _ in matching_keys:
dims_set: set[Hashable] = set()
for name, dims in coord_names_dims:
coord_count[name] += 1
dims_set.update(dims)
for dim in dims_set:
dim_count[dim] += 1

for count, msg in [(coord_count, "coordinates"), (dim_count, "dimensions")]:
dup = {k: v for k, v in count.items() if v > 1}
if dup:
items_msg = ", ".join(
f"{k!r} ({v} conflicting indexes)" for k, v in dup.items()
)
raise ValueError(
"cannot re-index or align objects with conflicting indexes found for "
f"the following {msg}: {items_msg}\n"
"Conflicting indexes may occur when\n"
"- they relate to different sets of coordinate and/or dimension names\n"
"- they don't have the same type\n"
"- they may be used to reindex data along common dimensions"
)

def _need_reindex(self, dim, cmp_indexes) -> bool:
"""Whether or not we need to reindex variables for a set of
matching indexes.
Expand Down Expand Up @@ -383,11 +346,33 @@ def _get_index_joiner(self, index_cls) -> Callable:
def align_indexes(self) -> None:
"""Compute all aligned indexes and their corresponding coordinate variables."""

aligned_indexes = {}
aligned_index_vars = {}
reindex = {}
new_indexes = {}
new_index_vars = {}
aligned_indexes: dict[MatchingIndexKey, Index] = {}
aligned_index_vars: dict[MatchingIndexKey, dict[Hashable, Variable]] = {}
reindex: dict[MatchingIndexKey, bool] = {}
new_indexes: dict[Hashable, Index] = {}
new_index_vars: dict[Hashable, Variable] = {}

def update_dicts(
key: MatchingIndexKey,
idx: Index,
idx_vars: dict[Hashable, Variable],
need_reindex: bool,
):
reindex[key] = need_reindex
aligned_indexes[key] = idx
aligned_index_vars[key] = idx_vars

for name, var in idx_vars.items():
if name in new_indexes:
other_idx = new_indexes[name]
other_var = new_index_vars[name]
raise AlignmentError(
f"cannot align objects on coordinate {name!r} because of conflicting indexes\n"
f"first index: {idx!r}\nsecond index: {other_idx!r}\n"
f"first variable: {var!r}\nsecond variable: {other_var!r}\n"
)
new_indexes[name] = idx
new_index_vars[name] = var

for key, matching_indexes in self.all_indexes.items():
matching_index_vars = self.all_index_vars[key]
Expand Down Expand Up @@ -419,7 +404,7 @@ def align_indexes(self) -> None:
need_reindex = False
if need_reindex:
if self.join == "exact":
raise ValueError(
raise AlignmentError(
"cannot align objects with join='exact' where "
"index/labels/sizes are not equal along "
"these coordinates (dimensions): "
Expand All @@ -437,25 +422,14 @@ def align_indexes(self) -> None:
joined_index = matching_indexes[0]
joined_index_vars = matching_index_vars[0]

reindex[key] = need_reindex
aligned_indexes[key] = joined_index
aligned_index_vars[key] = joined_index_vars

for name, var in joined_index_vars.items():
new_indexes[name] = joined_index
new_index_vars[name] = var
update_dicts(key, joined_index, joined_index_vars, need_reindex)

# Explicitly provided indexes that are not found in objects to align
# may relate to unindexed dimensions so we add them too
for key, idx in self.indexes.items():
if key not in aligned_indexes:
index_vars = self.index_vars[key]
reindex[key] = False
aligned_indexes[key] = idx
aligned_index_vars[key] = index_vars
for name, var in index_vars.items():
new_indexes[name] = idx
new_index_vars[name] = var
update_dicts(key, idx, index_vars, False)

self.aligned_indexes = aligned_indexes
self.aligned_index_vars = aligned_index_vars
Expand All @@ -474,7 +448,7 @@ def assert_unindexed_dim_sizes_equal(self) -> None:
else:
add_err_msg = ""
if len(sizes) > 1:
raise ValueError(
raise AlignmentError(
f"cannot reindex or align along dimension {dim!r} "
f"because of conflicting dimension sizes: {sizes!r}" + add_err_msg
)
Expand Down Expand Up @@ -503,13 +477,24 @@ def _get_dim_pos_indexers(
matching_indexes: dict[MatchingIndexKey, Index],
) -> dict[Hashable, Any]:
dim_pos_indexers = {}
dim_index = {}

for key, aligned_idx in self.aligned_indexes.items():
obj_idx = matching_indexes.get(key)
if obj_idx is not None:
if self.reindex[key]:
indexers = obj_idx.reindex_like(aligned_idx, **self.reindex_kwargs)
dim_pos_indexers.update(indexers)
for dim, idxer in indexers.items():
if dim in dim_pos_indexers and not np.array_equal(
idxer, dim_pos_indexers[dim]
):
raise AlignmentError(
f"cannot reindex or align along dimension {dim!r} because "
"of conflicting re-indexers returned by multiple indexes\n"
f"first index: {obj_idx!r}\nsecond index: {dim_index[dim]!r}\n"
)
dim_pos_indexers[dim] = idxer
dim_index[dim] = obj_idx

return dim_pos_indexers

Expand Down Expand Up @@ -571,7 +556,6 @@ def align(self) -> None:

self.find_matching_indexes()
self.find_matching_unindexed_dims()
self.assert_no_index_conflict()
self.align_indexes()
self.assert_unindexed_dim_sizes_equal()

Expand Down Expand Up @@ -735,7 +719,7 @@ def align(

Raises
------
ValueError
AlignmentError
If any dimensions without labels on the arguments have different sizes,
or a different size than the size of the aligned dimension labels.

Expand Down
23 changes: 23 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

import xarray as xr
from xarray import (
AlignmentError,
DataArray,
Dataset,
IndexVariable,
Expand Down Expand Up @@ -2543,6 +2544,28 @@ def test_align_indexes(self) -> None:

assert_identical(expected_x2, x2)

def test_align_multiple_indexes_common_dim(self) -> None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice test!

a = Dataset(coords={"x": [1, 2], "xb": ("x", [3, 4])}).set_xindex("xb")
b = Dataset(coords={"x": [1], "xb": ("x", [3])}).set_xindex("xb")

(a2, b2) = align(a, b, join="inner")
assert_identical(a2, b, check_default_indexes=False)
assert_identical(b2, b, check_default_indexes=False)

c = Dataset(coords={"x": [1, 3], "xb": ("x", [2, 4])}).set_xindex("xb")

with pytest.raises(AlignmentError, match=".*conflicting re-indexers"):
align(a, c)

def test_align_conflicting_indexes(self) -> None:
class CustomIndex(PandasIndex): ...

a = Dataset(coords={"xb": ("x", [3, 4])}).set_xindex("xb")
b = Dataset(coords={"xb": ("x", [3])}).set_xindex("xb", CustomIndex)

with pytest.raises(AlignmentError, match="cannot align.*conflicting indexes"):
align(a, b)

def test_align_non_unique(self) -> None:
x = Dataset({"foo": ("x", [3, 4, 5]), "x": [0, 0, 1]})
x1, x2 = align(x, x)
Expand Down
Loading