-
Notifications
You must be signed in to change notification settings - Fork 121
feat: RepeatingBasisFunction.inverse_transform
#687
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -65,6 +65,10 @@ def __init__(self, column=0, remainder="drop", n_periods=12, input_range=None, w | |
| self.n_periods = n_periods | ||
| self.input_range = input_range | ||
| self.width = width | ||
|
|
||
| def get_feature_names_out(self, input_features=None): | ||
| feature_names = self.pipeline_.get_feature_names_out() | ||
| return feature_names | ||
|
|
||
| def fit(self, X, y=None): | ||
| """Fit `RepeatingBasisFunction` transformer on input data `X`. | ||
|
|
@@ -82,6 +86,7 @@ def fit(self, X, y=None): | |
| self : RepeatingBasisFunction | ||
| The fitted transformer. | ||
| """ | ||
|
|
||
| self.pipeline_ = ColumnTransformer( | ||
| [ | ||
| ( | ||
|
|
@@ -95,6 +100,7 @@ def fit(self, X, y=None): | |
| ) | ||
| ], | ||
| remainder=self.remainder, | ||
| verbose_feature_names_out = False | ||
| ) | ||
|
|
||
| self.pipeline_.fit(X, y) | ||
|
|
@@ -116,6 +122,31 @@ def transform(self, X): | |
| """ | ||
| check_is_fitted(self, ["pipeline_"]) | ||
| return self.pipeline_.transform(X) | ||
|
|
||
| def inverse_transform(self, X): | ||
| """Transform RBF features back to the input range. Outputs a numpy array. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| X : 2D array-like of shape (n_samples, n_features) | ||
| Can be either a pandas.DataFrame or a numpy array. | ||
| The RBF columns to transform back must appear first. | ||
|
|
||
| Returns | ||
| ------- | ||
| X_transformed : array-like with the reconstruction of the original values | ||
| in input_range in the first column. Will be of the same type as X. | ||
| """ | ||
|
|
||
| check_is_fitted(self, ["pipeline_"]) | ||
|
|
||
| if isinstance(X,np.ndarray): | ||
| Xarr = check_array(X[:,:self.n_periods], estimator=self, ensure_2d=True) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would be more comfortable with something along the following lines: If line 144 becomes Xarr = check_array(X, estimator=self, ensure_2d=True, ensure_min_features=self.n_periods)[:, :self.n_periods]which also convert to array (some) dataframe-like objects and maybe it's even possible to avoid checking for array instance. |
||
| new_x = self.pipeline_.named_transformers_['repeatingbasis'].inverse_transform(Xarr) | ||
| Xarr = np.hstack((new_x.reshape(-1, 1),X[:,self.n_periods:])) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we returning the input as well? kind of related to the comment in tests |
||
| return Xarr | ||
| else: | ||
| raise TypeError("X must be a numpy array.") | ||
|
|
||
|
|
||
| class _RepeatingBasisFunction(TransformerMixin, BaseEstimator): | ||
|
|
@@ -147,7 +178,10 @@ def __init__(self, n_periods: int = 12, input_range=None, width: float = 1.0): | |
| self.n_periods = n_periods | ||
| self.input_range = input_range | ||
| self.width = width | ||
|
|
||
|
|
||
| def get_feature_names_out(self, input_features=None): | ||
| return [f"{input_features[0]}_rbf{str(i)}" for i in range(self.n_periods)] | ||
|
|
||
| def fit(self, X, y=None): | ||
| """Fit the transformer to the input data and compute the basis functions. | ||
|
|
||
|
|
@@ -163,6 +197,7 @@ def fit(self, X, y=None): | |
| self : _RepeatingBasisFunction | ||
| The fitted transformer. | ||
| """ | ||
|
|
||
| X = check_array(X, estimator=self) | ||
|
|
||
| # find min and max for standardization if not given explicitly | ||
|
|
@@ -208,7 +243,27 @@ def transform(self, X): | |
|
|
||
| # apply rbf function to series for each basis | ||
| return self._rbf(base_distances) | ||
|
|
||
|
|
||
| def inverse_transform(self, X): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is awesome ✨ |
||
| """Transform features back to the input range. | ||
| """ | ||
|
|
||
| X = check_array(X, estimator=self, ensure_2d=True) | ||
| check_is_fitted(self, ["bases_", "width_"]) | ||
| if X.shape[1] != self.n_periods: | ||
| raise ValueError(f"X should have exactly one column for each period, it has: {X.shape[1]}") | ||
| # Convert back to distances: | ||
| X = self.width_*np.sqrt(-np.log(X)) | ||
| # Find closest base for each row: | ||
| min_col_indices = np.argmin(X, axis=1) | ||
| # Find direction by comparing distance to next and previous base (modulo circularity) | ||
| directions = np.sign([X[i,(min_col_indices[i] - 1) % self.n_periods]-X[i,(min_col_indices[i] + 1) % self.n_periods] for i in range(X.shape[0])]) | ||
| # Retrieve original value | ||
| Y = self.bases_[min_col_indices] + directions*[X[i,min_col_indices[i]] for i in range(X.shape[0])] | ||
| Y[Y < 0] += 1 | ||
| Y = (self.input_range[1] - self.input_range[0])*Y + self.input_range[0] | ||
| return Y | ||
|
|
||
| def _array_base_distance(self, arr: np.ndarray, base: float) -> np.ndarray: | ||
| """Calculate the distances between all array values and the base, where 0 and 1 are assumed to be at the same | ||
| positions | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -50,3 +50,20 @@ def test_when_rbf_helper_receives_more_than_one_col_raises_value_error(df): | |||||
| rbf_helper_tf = _RepeatingBasisFunction() | ||||||
| with pytest.raises(ValueError): | ||||||
| rbf_helper_tf.fit(X, y) | ||||||
|
|
||||||
| def test_pandas_output(df): | ||||||
| X, y = df[["a", "b", "c", "d"]], df[["e"]] | ||||||
| tf = RepeatingBasisFunction(column=0, n_periods=4, remainder="passthrough") | ||||||
| tf.set_output(transform='pandas') | ||||||
| Z = tf.fit(X, y).transform(X) | ||||||
| assert isinstance(Z, pd.core.frame.DataFrame) | ||||||
|
|
||||||
| def test_inverse_transform(df): | ||||||
| X, y = df[["a", "b", "c", "d"]], df[["e"]] | ||||||
| tf = RepeatingBasisFunction(column=0, n_periods=4, input_range=(0,7), remainder="drop") | ||||||
| Z = tf.fit(X, y).transform(X) | ||||||
| assert np.allclose( | ||||||
| X["a"], | ||||||
| tf.pipeline_.named_transformers_['repeatingbasis'].inverse_transform(Z), | ||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This feels a bit too nested to me. Why don't we
Suggested change
? They should suppose to return the same or am I missing something? |
||||||
| rtol=1e-08, | ||||||
| atol=1e-12) | ||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we add what you mention in the description in the docstring Notes of the method?
In particular I am referring to: