-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Extend padding functionalities #9353
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
7998afb
44e7d26
fd3e304
9f66ee2
2f33b40
b70b25d
d939030
48a972f
098706b
afda62d
8bdb6f1
992e20e
d3f0275
0a51b7c
3d40800
a848351
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9147,7 +9147,12 @@ def pad( | |
| int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None | ||
| ) = None, | ||
| constant_values: ( | ||
| float | tuple[float, float] | Mapping[Any, tuple[float, float]] | None | ||
| float | ||
| | tuple[float, float] | ||
| | Mapping[ | ||
| Any, float | tuple[float, float] | Mapping[Any, tuple[float, float]] | ||
| ] | ||
| | None | ||
| ) = None, | ||
| end_values: int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None = None, | ||
| reflect_type: PadReflectOptions = None, | ||
|
|
@@ -9204,9 +9209,11 @@ def pad( | |
| (stat_length,) or int is a shortcut for before = after = statistic | ||
| length for all axes. | ||
| Default is ``None``, to use the entire axis. | ||
| constant_values : scalar, tuple or mapping of hashable to tuple, default: 0 | ||
| Used in 'constant'. The values to set the padded values for each | ||
| axis. | ||
| constant_values : scalar, tuple, mapping of hashable to tuple or | ||
| mapping of hashable to mapping of hashable to tuple, default: 0 | ||
|
||
| Used in 'constant'. The values to set the padded values for each data variable / axis. | ||
| ``{var_1: {dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)}, ... | ||
| var_M: (before, after)}`` unique pad constants per data variable. | ||
| ``{dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)}`` unique | ||
| pad constants along each dimension. | ||
| ``((before, after),)`` yields same before and after constants for each | ||
|
|
@@ -9292,6 +9299,12 @@ def pad( | |
| if not pad_dims.intersection(xindexes.get_all_dims(k)): | ||
| indexes[k] = idx | ||
|
|
||
| per_data_var_constant_values = {} | ||
| if isinstance(constant_values, Mapping): | ||
| for k in self.data_vars: | ||
| if v := constant_values.pop(k, None): | ||
| per_data_var_constant_values[k] = v | ||
|
|
||
| for name, var in self.variables.items(): | ||
| var_pad_width = {k: v for k, v in pad_width.items() if k in var.dims} | ||
| if not var_pad_width: | ||
|
|
@@ -9301,7 +9314,9 @@ def pad( | |
| pad_width=var_pad_width, | ||
| mode=mode, | ||
| stat_length=stat_length, | ||
| constant_values=constant_values, | ||
| constant_values=per_data_var_constant_values.get( | ||
| name, constant_values | ||
| ), | ||
| end_values=end_values, | ||
| reflect_type=reflect_type, | ||
| keep_attrs=keep_attrs, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6689,17 +6689,41 @@ def test_polyfit_warnings(self) -> None: | |
| ds.var1.polyfit("dim2", 10, full=True) | ||
| assert len(ws) == 1 | ||
|
|
||
| def test_pad(self) -> None: | ||
| @pytest.mark.parametrize( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this work if you want to pad along a dimension coordinate (aka. a variable that is called the same as it's dimension)?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good question, I'll investigate :D
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, as I understand it most dims in the test dataset are dimension coordinates and they pad correctly, so I think so. In any case I've extended the tests to pad all dimensions just to be sure nothing is behaving incorrectly. Let me know if I missed any case. |
||
| ["constant_values", "expected"], | ||
| [ | ||
| pytest.param(42, {"var1": 42}, id="numeric"), | ||
| pytest.param((42, 43), {"var1": (42, 43)}, id="tuple"), | ||
| pytest.param( | ||
| {"dim2": (42, 43)}, {"var1": (42, 43), "var2": (42, 43)}, id="per dim" | ||
| ), | ||
| pytest.param( | ||
| {"var1": 42, "var2": (42, 43)}, | ||
| {"var1": 42, "var2": (42, 43)}, | ||
| id="per var", | ||
| ), | ||
| pytest.param( | ||
| {"var1": 42, "dim2": (42, 43)}, | ||
| {"var1": 42, "var2": (42, 43)}, | ||
| id="mixed", | ||
| ), | ||
| ], | ||
| ) | ||
| def test_pad(self, constant_values, expected) -> None: | ||
| ds = create_test_data(seed=1) | ||
| padded = ds.pad(dim2=(1, 1), constant_values=42) | ||
| padded = ds.pad(dim2=(1, 1), constant_values=constant_values) | ||
|
|
||
| assert padded["dim2"].shape == (11,) | ||
| assert padded["var1"].shape == (8, 11) | ||
| assert padded["var2"].shape == (8, 11) | ||
| assert padded["var3"].shape == (10, 8) | ||
| assert dict(padded.sizes) == {"dim1": 8, "dim2": 11, "dim3": 10, "time": 20} | ||
|
|
||
| np.testing.assert_equal(padded["var1"].isel(dim2=[0, -1]).data, 42) | ||
| for var, expected_value in expected.items(): | ||
| np.testing.assert_equal( | ||
| np.unique(padded[var].isel(dim2=[0, -1]).data), expected_value | ||
| ) | ||
| # np.testing.assert_equal(padded["var1"].isel(dim2=[0, -1]).data, 42) | ||
| np.testing.assert_equal(padded["dim2"][[0, -1]].data, np.nan) | ||
|
|
||
| @pytest.mark.parametrize( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a pretty gnarly type signature. Same with the docstring. But I don't think there's much we can do to simplify it.
We possibly could make
float | tuple[float, float] | Mapping[Any, tuple[float, float]]into an alias?Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah that's true, I'll do that.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So, I changed it slightly. I created these aliases:
This way it is a bit more organized. Also changed the implementation of
padfor variables so that it can make use ofMapping[Any, float]to keep consistency with thepadfunction for datasets.What do you think?
(Also let me know if the location in
types.pyand import indataset.pyis correct. I don't fully understand when to import types under theif TYPE_CHECKING:block or above indataset.py)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good, thanks!
(others may have refinements...)