-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
support chunks in open_groups and open_datatree
#9660
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
fe95b16
3bfbc3a
f4abb01
b0458aa
4dbd91e
11850fd
a71f5e2
6d3deed
7f770cf
05efaf6
f9fee40
2e10bdc
a4e99c6
3e8b80c
cf1a6b0
114c4dc
9eac19d
446a53d
5b36701
66616f7
843b2fc
8950841
4d93ada
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -41,7 +41,9 @@ | |
| ) | ||
| from xarray.core.dataarray import DataArray | ||
| from xarray.core.dataset import Dataset, _get_chunk, _maybe_chunk | ||
| from xarray.core.datatree import DataTree | ||
| from xarray.core.indexes import Index | ||
| from xarray.core.treenode import group_subtrees | ||
| from xarray.core.types import NetcdfWriteModes, ZarrWriteModes | ||
| from xarray.core.utils import is_remote_uri | ||
| from xarray.namedarray.daskmanager import DaskManager | ||
|
|
@@ -74,7 +76,6 @@ | |
| T_NetcdfTypes = Literal[ | ||
| "NETCDF4", "NETCDF4_CLASSIC", "NETCDF3_64BIT", "NETCDF3_CLASSIC" | ||
| ] | ||
| from xarray.core.datatree import DataTree | ||
|
|
||
| DATAARRAY_NAME = "__xarray_dataarray_name__" | ||
| DATAARRAY_VARIABLE = "__xarray_dataarray_variable__" | ||
|
|
@@ -414,6 +415,56 @@ def _dataset_from_backend_dataset( | |
| return ds | ||
|
|
||
|
|
||
| def _datatree_from_backend_datatree( | ||
| backend_tree, | ||
| filename_or_obj, | ||
| engine, | ||
| chunks, | ||
| cache, | ||
| overwrite_encoded_chunks, | ||
| inline_array, | ||
| chunked_array_type, | ||
| from_array_kwargs, | ||
| **extra_tokens, | ||
| ): | ||
| if not isinstance(chunks, int | dict) and chunks not in {None, "auto"}: | ||
| raise ValueError( | ||
| f"chunks must be an int, dict, 'auto', or None. Instead found {chunks}." | ||
| ) | ||
|
|
||
| # _protect_datatree_variables_inplace(backend_tree, cache) | ||
| if chunks is None: | ||
| tree = backend_tree | ||
| else: | ||
| tree = DataTree.from_dict( | ||
| { | ||
| path: _chunk_ds( | ||
| node.dataset, | ||
| filename_or_obj, | ||
| engine, | ||
| chunks, | ||
| overwrite_encoded_chunks, | ||
| inline_array, | ||
| chunked_array_type, | ||
| from_array_kwargs, | ||
| **extra_tokens, | ||
| ) | ||
| for path, [node] in group_subtrees(backend_tree) | ||
| } | ||
| ) | ||
keewis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| # ds.set_close(backend_ds._close) | ||
|
||
|
|
||
| # Ensure source filename always stored in dataset object | ||
| if "source" not in tree.encoding: | ||
| path = getattr(filename_or_obj, "path", filename_or_obj) | ||
|
|
||
| if isinstance(path, str | os.PathLike): | ||
| tree.encoding["source"] = _normalize_path(path) | ||
keewis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| return tree | ||
|
|
||
|
|
||
| def open_dataset( | ||
| filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, | ||
| *, | ||
|
|
@@ -838,7 +889,22 @@ def open_dataarray( | |
|
|
||
| def open_datatree( | ||
| filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, | ||
| *, | ||
| engine: T_Engine = None, | ||
| chunks: T_Chunks = None, | ||
| cache: bool | None = None, | ||
| decode_cf: bool | None = None, | ||
| mask_and_scale: bool | Mapping[str, bool] | None = None, | ||
| decode_times: bool | Mapping[str, bool] | None = None, | ||
| decode_timedelta: bool | Mapping[str, bool] | None = None, | ||
| use_cftime: bool | Mapping[str, bool] | None = None, | ||
| concat_characters: bool | Mapping[str, bool] | None = None, | ||
| decode_coords: Literal["coordinates", "all"] | bool | None = None, | ||
| drop_variables: str | Iterable[str] | None = None, | ||
| inline_array: bool = False, | ||
| chunked_array_type: str | None = None, | ||
| from_array_kwargs: dict[str, Any] | None = None, | ||
| backend_kwargs: dict[str, Any] | None = None, | ||
| **kwargs, | ||
| ) -> DataTree: | ||
| """ | ||
|
|
@@ -856,17 +922,75 @@ def open_datatree( | |
| ------- | ||
| xarray.DataTree | ||
| """ | ||
| if cache is None: | ||
| cache = chunks is None | ||
|
|
||
| if backend_kwargs is not None: | ||
| kwargs.update(backend_kwargs) | ||
|
|
||
| if engine is None: | ||
| engine = plugins.guess_engine(filename_or_obj) | ||
|
|
||
| if from_array_kwargs is None: | ||
| from_array_kwargs = {} | ||
|
|
||
| backend = plugins.get_backend(engine) | ||
|
|
||
| return backend.open_datatree(filename_or_obj, **kwargs) | ||
| decoders = _resolve_decoders_kwargs( | ||
| decode_cf, | ||
| open_backend_dataset_parameters=(), | ||
| mask_and_scale=mask_and_scale, | ||
| decode_times=decode_times, | ||
| decode_timedelta=decode_timedelta, | ||
| concat_characters=concat_characters, | ||
| use_cftime=use_cftime, | ||
| decode_coords=decode_coords, | ||
| ) | ||
| overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None) | ||
|
|
||
| backend_tree = backend.open_datatree( | ||
| filename_or_obj, | ||
| drop_variables=drop_variables, | ||
| **decoders, | ||
| **kwargs, | ||
| ) | ||
|
|
||
| tree = _datatree_from_backend_datatree( | ||
| backend_tree, | ||
| filename_or_obj, | ||
| engine, | ||
| chunks, | ||
| cache, | ||
| overwrite_encoded_chunks, | ||
| inline_array, | ||
| chunked_array_type, | ||
| from_array_kwargs, | ||
| drop_variables=drop_variables, | ||
| **decoders, | ||
| **kwargs, | ||
| ) | ||
|
|
||
| return tree | ||
|
|
||
|
|
||
| def open_groups( | ||
| filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, | ||
| *, | ||
| engine: T_Engine = None, | ||
| chunks: T_Chunks = None, | ||
| cache: bool | None = None, | ||
| decode_cf: bool | None = None, | ||
| mask_and_scale: bool | Mapping[str, bool] | None = None, | ||
| decode_times: bool | Mapping[str, bool] | None = None, | ||
| decode_timedelta: bool | Mapping[str, bool] | None = None, | ||
| use_cftime: bool | Mapping[str, bool] | None = None, | ||
| concat_characters: bool | Mapping[str, bool] | None = None, | ||
| decode_coords: Literal["coordinates", "all"] | bool | None = None, | ||
| drop_variables: str | Iterable[str] | None = None, | ||
| inline_array: bool = False, | ||
| chunked_array_type: str | None = None, | ||
| from_array_kwargs: dict[str, Any] | None = None, | ||
| backend_kwargs: dict[str, Any] | None = None, | ||
| **kwargs, | ||
| ) -> dict[str, Dataset]: | ||
| """ | ||
|
|
@@ -893,12 +1017,58 @@ def open_groups( | |
| open_datatree() | ||
| DataTree.from_dict() | ||
| """ | ||
| if cache is None: | ||
| cache = chunks is None | ||
|
|
||
| if backend_kwargs is not None: | ||
| kwargs.update(backend_kwargs) | ||
|
|
||
| if engine is None: | ||
| engine = plugins.guess_engine(filename_or_obj) | ||
|
|
||
| if from_array_kwargs is None: | ||
| from_array_kwargs = {} | ||
|
|
||
| backend = plugins.get_backend(engine) | ||
|
|
||
| return backend.open_groups_as_dict(filename_or_obj, **kwargs) | ||
| decoders = _resolve_decoders_kwargs( | ||
| decode_cf, | ||
| open_backend_dataset_parameters=(), | ||
| mask_and_scale=mask_and_scale, | ||
| decode_times=decode_times, | ||
| decode_timedelta=decode_timedelta, | ||
| concat_characters=concat_characters, | ||
| use_cftime=use_cftime, | ||
| decode_coords=decode_coords, | ||
| ) | ||
| overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None) | ||
|
|
||
| backend_groups = backend.open_groups_as_dict( | ||
| filename_or_obj, | ||
| drop_variables=drop_variables, | ||
| **decoders, | ||
| **kwargs, | ||
| ) | ||
|
|
||
| groups = { | ||
| name: _dataset_from_backend_dataset( | ||
| backend_ds, | ||
| filename_or_obj, | ||
| engine, | ||
| chunks, | ||
| cache, | ||
| overwrite_encoded_chunks, | ||
| inline_array, | ||
| chunked_array_type, | ||
| from_array_kwargs, | ||
| drop_variables=drop_variables, | ||
| **decoders, | ||
| **kwargs, | ||
| ) | ||
| for name, backend_ds in backend_groups.items() | ||
| } | ||
|
|
||
| return groups | ||
|
|
||
|
|
||
| def open_mfdataset( | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.