Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ def load(self):
"""
# access .data to coerce everything to numpy or dask arrays
all_data = dict((k, v.data) for k, v in self.variables.items())
lazy_data = OrderedDict((k, v) for k, v in all_data.items()
lazy_data = dict((k, v) for k, v in all_data.items()
if isinstance(v, dask_array_type))
if lazy_data:
import dask.array as da
Expand Down Expand Up @@ -417,8 +417,9 @@ def copy(self, deep=False):
"""Returns a copy of this dataset.

If `deep=True`, a deep copy is made of each of the component variables.
Otherwise, a shallow copy is made, so each variable in the new dataset
is also a variable in the original dataset.
Otherwise, a shallow copy of each of the component variable is made, so
that the underlying memory region of the new dataset is the same as in
the original dataset.
"""
variables = OrderedDict((k, v.copy(deep=deep))
for k, v in iteritems(self._variables))
Expand Down
6 changes: 2 additions & 4 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,10 +298,8 @@ def load(self):
Normally, it should not be necessary to call this method in user code,
because all xarray functions should either work on deferred data or
load data automatically.
"""
new_data = self._data_cached()
if isinstance(self._data, dask_array_type):
self._data = new_data
"""
self._data = self._data_cast()
return self

def compute(self):
Expand Down
40 changes: 9 additions & 31 deletions xarray/test/test_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,44 +13,22 @@
import dask.array as da


def _copy_at_variable_level(arg):
"""We need to copy the argument at the level of xarray.Variable objects, so
that viewing its values does not trigger lazy loading.
"""
if isinstance(arg, Variable):
return arg.copy(deep=False)
elif isinstance(arg, DataArray):
ds = arg.to_dataset(name='__copied__')
return _copy_at_variable_level(ds)['__copied__']
elif isinstance(arg, Dataset):
ds = arg.copy()
for k in list(ds):
ds._variables[k] = ds._variables[k].copy(deep=False)
return ds
else:
assert False


class DaskTestCase(TestCase):
def assertLazyAnd(self, expected, actual, test):
expected_copy = _copy_at_variable_level(expected)
actual_copy = _copy_at_variable_level(actual)
expected.name = None
actual.name = None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was there a failing tests that required setting this? I would rather disable this on a case by case basis than across the board (not all objects passed to this method even have a name attribute).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, on several cases the test was failing because on one side the object automatically got its name from the underlying dask.name, while on the other side it was blank.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you simply make a assertLazyAndEqual for those cases or set name='foo'? should be pretty quick

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed and resolved conflict

with dask.set_options(get=dask.get):
test(actual_copy, expected_copy)
var = getattr(actual, 'variable', actual)
self.assertIsInstance(var.data, da.Array)
test(actual, expected)
if isinstance(actual, Dataset):
for var in actual.variables.values():
self.assertIsInstance(var.data, da.Array)
else:
var = getattr(actual, 'variable', actual)
self.assertIsInstance(var.data, da.Array)


@requires_dask
class TestVariable(DaskTestCase):
def assertLazyAnd(self, expected, actual, test):
expected_copy = expected.copy(deep=False)
actual_copy = actual.copy(deep=False)
with dask.set_options(get=dask.get):
test(actual_copy, expected_copy)
var = getattr(actual, 'variable', actual)
self.assertIsInstance(var.data, da.Array)

def assertLazyAndIdentical(self, expected, actual):
self.assertLazyAnd(expected, actual, self.assertVariableIdentical)

Expand Down