-
Notifications
You must be signed in to change notification settings - Fork 3
Change sparse matrix to array #96
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,7 +29,7 @@ | |
| from sklearn.compose import ColumnTransformer | ||
| from sklearn.tree import DecisionTreeClassifier | ||
| from sklearn.feature_extraction.text import TfidfVectorizer | ||
| from scipy.sparse import csr_matrix | ||
| from scipy.sparse import csr_array | ||
| from stwfsapy import thesaurus as t | ||
| from stwfsapy.automata import nfa, construction, conversion, dfa | ||
| from stwfsapy.thesaurus_features import ThesaurusFeatureTransformation | ||
|
|
@@ -291,7 +291,7 @@ def _fit_after_init(self, X, y=None): | |
| self.pipeline_.fit(matches, y=train_y) | ||
| return self | ||
|
|
||
| def predict_proba(self, X) -> csr_matrix: | ||
| def predict_proba(self, X) -> csr_array: | ||
| """ | ||
| Predicts probability scores for each concept per document. | ||
|
|
||
|
|
@@ -305,7 +305,7 @@ def predict_proba(self, X) -> csr_matrix: | |
| predictions = self.pipeline_.predict_proba(match_X)[:, 1] | ||
| else: | ||
| predictions = [] | ||
| return self._create_sparse_matrix( | ||
| return self._create_sparse_array( | ||
| predictions, | ||
| [tpl[0] for tpl in match_X], | ||
| doc_counts | ||
|
|
@@ -344,7 +344,7 @@ def suggest_proba( | |
| in combined | ||
| ] | ||
|
|
||
| def predict(self, X) -> csr_matrix: | ||
| def predict(self, X) -> csr_array: | ||
| """ | ||
| Predicts binary concept match labels for each input text. | ||
|
|
||
|
|
@@ -358,19 +358,19 @@ def predict(self, X) -> csr_matrix: | |
| predictions = self.pipeline_.predict(match_X) | ||
| else: | ||
| predictions = [] | ||
| return self._create_sparse_matrix( | ||
| return self._create_sparse_array( | ||
| predictions, | ||
| [tpl[0] for tpl in match_X], | ||
| doc_counts | ||
| ) | ||
|
|
||
| def _create_sparse_matrix( | ||
| def _create_sparse_array( | ||
| self, | ||
| values: Nl, | ||
| concept_names: List[str], | ||
| doc_counts: List[int] | ||
| ) -> csr_matrix: | ||
| return csr_matrix( | ||
| ) -> csr_array: | ||
| return csr_array( | ||
| ( | ||
| values, | ||
| ( | ||
|
|
@@ -427,7 +427,7 @@ def match_and_extend( | |
| for inp, truth_refs in zip(inputs, map(str, truth_refss)): | ||
| text = input_handler(inp) | ||
| if self.use_txt_vec: | ||
| txt_vec = self.text_vectorizer_.transform([inp])[0] | ||
| txt_vec = self.text_vectorizer_.transform([inp]) | ||
| else: | ||
| txt_vec = 0 | ||
| txt_feat = self.text_features_.transform([text])[0] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you explain why over here(line 433) and in line 458, when the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I looked at what the transform methods are doing for
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The text vectorizer produces a |
||
|
|
@@ -452,7 +452,7 @@ def match_and_extend( | |
| for inp in inputs: | ||
| text = input_handler(inp) | ||
| if self.use_txt_vec: | ||
| txt_vec = self.text_vectorizer_.transform([inp])[0] | ||
| txt_vec = self.text_vectorizer_.transform([inp]) | ||
| else: | ||
| txt_vec = 0 | ||
| txt_feat = self.text_features_.transform([text])[0] | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think in the docstrings for the
predict()method we can replace "A sparse matrix of shape ..." with a sparse array for the sake of consistency.