Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 29 additions & 13 deletions xarray/core/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
import pandas as pd
from pandas.errors import OutOfBoundsDatetime

from xarray.core.duck_array_ops import array_equiv
from xarray.core.indexing import ExplicitlyIndexed, MemoryCachedArray
from xarray.core.duck_array_ops import array_equiv, astype
from xarray.core.indexing import MemoryCachedArray
from xarray.core.options import OPTIONS, _get_boolean_with_default
from xarray.core.pycompat import array_type
from xarray.core.pycompat import array_type, to_duck_array, to_numpy
from xarray.core.utils import is_duck_array

if TYPE_CHECKING:
Expand Down Expand Up @@ -68,6 +68,8 @@ def first_n_items(array, n_desired):
# might not be a numpy.ndarray. Moreover, access to elements of the array
# could be very expensive (e.g. if it's only available over DAP), so go out
# of our way to get them in a single call to __getitem__ using only slices.
from xarray.core.variable import Variable

if n_desired < 1:
raise ValueError("must request at least one item")

Expand All @@ -78,7 +80,14 @@ def first_n_items(array, n_desired):
if n_desired < array.size:
indexer = _get_indexer_at_least_n_items(array.shape, n_desired, from_end=False)
array = array[indexer]
return np.asarray(array).flat[:n_desired]

# We pass variable objects in to handle indexing
# with indexer above. It would not work with our
# lazy indexing classes at the moment, so we cannot
# pass Variable._data
if isinstance(array, Variable):
array = array._data
return np.ravel(to_duck_array(array))[:n_desired]


def last_n_items(array, n_desired):
Expand All @@ -87,13 +96,22 @@ def last_n_items(array, n_desired):
# might not be a numpy.ndarray. Moreover, access to elements of the array
# could be very expensive (e.g. if it's only available over DAP), so go out
# of our way to get them in a single call to __getitem__ using only slices.
from xarray.core.variable import Variable

if (n_desired == 0) or (array.size == 0):
return []

if n_desired < array.size:
indexer = _get_indexer_at_least_n_items(array.shape, n_desired, from_end=True)
array = array[indexer]
return np.asarray(array).flat[-n_desired:]

# We pass variable objects in to handle indexing
# with indexer above. It would not work with our
# lazy indexing classes at the moment, so we cannot
# pass Variable._data
if isinstance(array, Variable):
array = array._data
return np.ravel(to_duck_array(array))[-n_desired:]


def last_item(array):
Expand All @@ -103,7 +121,8 @@ def last_item(array):
return []

indexer = (slice(-1, None),) * array.ndim
return np.ravel(np.asarray(array[indexer])).tolist()
# to_numpy since dask doesn't support tolist
return np.ravel(to_numpy(array[indexer])).tolist()


def calc_max_rows_first(max_rows: int) -> int:
Expand Down Expand Up @@ -171,10 +190,10 @@ def format_item(x, timedelta_format=None, quote_strings=True):

def format_items(x):
"""Returns a succinct summaries of all items in a sequence as strings"""
x = np.asarray(x)
x = to_duck_array(x)
timedelta_format = "datetime"
if np.issubdtype(x.dtype, np.timedelta64):
x = np.asarray(x, dtype="timedelta64[ns]")
x = astype(x, dtype="timedelta64[ns]")
day_part = x[~pd.isnull(x)].astype("timedelta64[D]").astype("timedelta64[ns]")
time_needed = x[~pd.isnull(x)] != day_part
day_needed = day_part != np.timedelta64(0, "ns")
Expand Down Expand Up @@ -584,12 +603,9 @@ def limit_lines(string: str, *, limit: int):
def short_array_repr(array):
from xarray.core.common import AbstractArray

if isinstance(array, ExplicitlyIndexed):
array = array.get_duck_array()
elif isinstance(array, AbstractArray):
if isinstance(array, AbstractArray):
array = array.data
if not is_duck_array(array):
array = np.asarray(array)
array = to_duck_array(array)

# default to lower precision so a full (abbreviated) line can fit on
# one line with the default display_width
Expand Down
34 changes: 34 additions & 0 deletions xarray/core/pycompat.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,37 @@ def is_chunked_array(x) -> bool:

def is_0d_dask_array(x):
return is_duck_dask_array(x) and is_scalar(x)


def to_numpy(data) -> np.ndarray:
from xarray.core.indexing import ExplicitlyIndexed
from xarray.core.parallelcompat import get_chunked_array_type

if isinstance(data, ExplicitlyIndexed):
data = data.get_duck_array()

# TODO first attempt to call .to_numpy() once some libraries implement it
if hasattr(data, "chunks"):
chunkmanager = get_chunked_array_type(data)
data, *_ = chunkmanager.compute(data)
if isinstance(data, array_type("cupy")):
data = data.get()
# pint has to be imported dynamically as pint imports xarray
if isinstance(data, array_type("pint")):
data = data.magnitude
if isinstance(data, array_type("sparse")):
data = data.todense()
data = np.asarray(data)

return data


def to_duck_array(data):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some typing would be nice, but again no idea what the return type is. Do we have a duck array protocol?

I think for internal use a protocol is nice, for external return types we should simply return Any.

from xarray.core.indexing import ExplicitlyIndexed

if isinstance(data, ExplicitlyIndexed):
return data.get_duck_array()
elif is_duck_array(data):
return data
else:
return np.asarray(data)
19 changes: 2 additions & 17 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@
from xarray.core.options import OPTIONS, _get_keep_attrs
from xarray.core.parallelcompat import get_chunked_array_type, guess_chunkmanager
from xarray.core.pycompat import (
array_type,
integer_types,
is_0d_dask_array,
is_chunked_array,
is_duck_dask_array,
to_numpy,
)
from xarray.core.utils import (
OrderedSet,
Expand Down Expand Up @@ -1072,22 +1072,7 @@ def chunk(
def to_numpy(self) -> np.ndarray:
"""Coerces wrapped data to numpy and returns a numpy.ndarray"""
# TODO an entrypoint so array libraries can choose coercion method?
data = self.data

# TODO first attempt to call .to_numpy() once some libraries implement it
if hasattr(data, "chunks"):
chunkmanager = get_chunked_array_type(data)
data, *_ = chunkmanager.compute(data)
if isinstance(data, array_type("cupy")):
data = data.get()
# pint has to be imported dynamically as pint imports xarray
if isinstance(data, array_type("pint")):
data = data.magnitude
if isinstance(data, array_type("sparse")):
data = data.todense()
data = np.asarray(data)

return data
return to_numpy(self._data)

def as_numpy(self) -> Self:
"""Coerces wrapped data into a numpy array, returning a Variable."""
Expand Down