zbw · Lakshmi-bashyam · Jul 25, 2025 · Jul 29, 2025 · Jul 29, 2025 · gmmajal
diff --git a/stwfsapy/predictor.py b/stwfsapy/predictor.py
@@ -29,7 +29,7 @@
 from sklearn.compose import ColumnTransformer
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.feature_extraction.text import TfidfVectorizer
-from scipy.sparse import csr_matrix
+from scipy.sparse import csr_array
 from stwfsapy import thesaurus as t
 from stwfsapy.automata import nfa, construction, conversion, dfa
 from stwfsapy.thesaurus_features import ThesaurusFeatureTransformation
@@ -291,7 +291,7 @@ def _fit_after_init(self, X, y=None):
         self.pipeline_.fit(matches, y=train_y)
         return self
 
-    def predict_proba(self, X) -> csr_matrix:
+    def predict_proba(self, X) -> csr_array:
         """
         Predicts probability scores for each concept per document.
 
@@ -305,7 +305,7 @@ def predict_proba(self, X) -> csr_matrix:
             predictions = self.pipeline_.predict_proba(match_X)[:, 1]
         else:
             predictions = []
-        return self._create_sparse_matrix(
+        return self._create_sparse_array(
             predictions,
             [tpl[0] for tpl in match_X],
             doc_counts
@@ -344,7 +344,7 @@ def suggest_proba(
             in combined
         ]
 
-    def predict(self, X) -> csr_matrix:
+    def predict(self, X) -> csr_array:
         """
         Predicts binary concept match labels for each input text.
 
@@ -358,19 +358,19 @@ def predict(self, X) -> csr_matrix:
             predictions = self.pipeline_.predict(match_X)
         else:
             predictions = []
-        return self._create_sparse_matrix(
+        return self._create_sparse_array(
             predictions,
             [tpl[0] for tpl in match_X],
             doc_counts
         )
 
-    def _create_sparse_matrix(
+    def _create_sparse_array(
             self,
             values: Nl,
             concept_names: List[str],
             doc_counts: List[int]
-            ) -> csr_matrix:
-        return csr_matrix(
+            ) -> csr_array:
+        return csr_array(
             (
                 values,
                 (
@@ -427,7 +427,7 @@ def match_and_extend(
             for inp, truth_refs in zip(inputs, map(str, truth_refss)):
                 text = input_handler(inp)
                 if self.use_txt_vec:
-                    txt_vec = self.text_vectorizer_.transform([inp])[0]
+                    txt_vec = self.text_vectorizer_.transform([inp])
                 else:
                     txt_vec = 0
                 txt_feat = self.text_features_.transform([text])[0]
@@ -452,7 +452,7 @@ def match_and_extend(
             for inp in inputs:
                 text = input_handler(inp)
                 if self.use_txt_vec:
-                    txt_vec = self.text_vectorizer_.transform([inp])[0]
+                    txt_vec = self.text_vectorizer_.transform([inp])
                 else:
                     txt_vec = 0
                 txt_feat = self.text_features_.transform([text])[0]

diff --git a/stwfsapy/tests/predictor_test.py b/stwfsapy/tests/predictor_test.py
@@ -13,14 +13,14 @@
 # limitations under the License.
 
 from stwfsapy.text_features import mk_text_features
-from scipy.sparse import lil_matrix
+from scipy.sparse import lil_array
 from stwfsapy import predictor as p
 import stwfsapy.thesaurus as t
 from stwfsapy.automata.dfa import Dfa
 import stwfsapy.tests.common as c
 from stwfsapy.automata.construction import ConstructionState
 import pytest
-from scipy.sparse import csr_matrix
+from scipy.sparse import csr_array
 import numpy as np
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.compose import ColumnTransformer
@@ -62,8 +62,8 @@
     ("9", [0], 0), ("11", [0, 1], 1)]
 
 
-def make_test_result_matrix(values):
-    return csr_matrix((
+def make_test_result_array(values):
+    return csr_array((
         values,
         (
             [
@@ -113,7 +113,7 @@ def no_match_predictor(mocker):
 
 
 def mock_vec_transform(X):
-    ret = lil_matrix((len(X), 5000))
+    ret = lil_array((len(X), 5000))
     for idx, x in enumerate(X):
         ret[idx] = len(x)
     return ret
@@ -145,20 +145,20 @@ def test_result_collection():
     assert [(r[0], list(r[1])) for r in res] == _collection_result
 
 
-def test_sparse_matrix_creation():
+def test_sparse_array_creation():
     predictor = p.StwfsapyPredictor(None, None, None, None)
     predictor.concept_map_ = _concept_map
-    res = predictor._create_sparse_matrix(
+    res = predictor._create_sparse_array(
         _predictions[:, 1],
         [c[0] for c in _concepts_with_text],
         _doc_counts
     )
     assert res.shape[0] == len(_doc_counts)
     assert res.shape[1] == 23
     for i, count in enumerate(_doc_counts):
-        row = res.getrow(i)
+        row = res[[i], :]
         slice_start = sum(_doc_counts[:i])
-        assert row.getnnz() == count
+        assert row.nnz == count
         # reverse slices because of mapping.
         assert list(row.nonzero()[1]) == list(reversed([
             22-i for i in _concepts[slice_start: slice_start+count]]))
@@ -418,7 +418,7 @@ def check_fit_arg(vec_fun, text_feature_fun, txt, actual, expected):
 def test_predict(mocked_predictor):
     res = mocked_predictor.predict([])
     assert (
-        res.toarray() == make_test_result_matrix(_classifications).toarray()
+        res.toarray() == make_test_result_array(_classifications).toarray()
         ).all()
     mocked_predictor.match_and_extend.assert_called_once_with(
         []
@@ -431,7 +431,7 @@ def test_predict(mocked_predictor):
 def test_predict_proba(mocked_predictor):
     res = mocked_predictor.predict_proba([])
     assert (
-        res.toarray() == make_test_result_matrix(
+        res.toarray() == make_test_result_array(
             _predictions[:, 1]).toarray()).all()
     mocked_predictor.match_and_extend.assert_called_once_with(
         []
@@ -458,13 +458,13 @@ def test_suggest(mocked_predictor):
 
 def test_predict_no_match(no_match_predictor):
     res = no_match_predictor.predict([])
-    assert res.getnnz() == 0
+    assert res.nnz == 0
     assert res.shape == (3, len(_concept_map))
 
 
 def test_predict_proba_no_match(no_match_predictor):
     res = no_match_predictor.predict_proba([])
-    assert res.getnnz() == 0
+    assert res.nnz == 0
     assert res.shape == (3, len(_concept_map))
 
 

diff --git a/stwfsapy/tests/thesaurus_features_test.py b/stwfsapy/tests/thesaurus_features_test.py
@@ -19,7 +19,7 @@
 from stwfsapy import thesaurus_features as tf
 from stwfsapy.tests.thesaurus import common as tc
 from stwfsapy.tests import common as c
-from scipy.sparse import coo_matrix, csr_matrix
+from scipy.sparse import coo_array, csr_array
 from sklearn.exceptions import NotFittedError
 import pytest
 
@@ -46,7 +46,7 @@ def test_unfitted_raises():
 def test_transform():
     trans = tf.ThesaurusFeatureTransformation(None, None, None, None)
     trans.mapping_ = {
-        'a': coo_matrix([[1]]), 'b': coo_matrix([[2]]), 'c': coo_matrix([[3]])}
+        'a': coo_array([[1]]), 'b': coo_array([[2]]), 'c': coo_array([[3]])}
     res = trans.transform(['c', 'c', 'a'])
     assert (res.toarray() == array([[3], [3], [1]])).all()
 
@@ -70,15 +70,15 @@ def test_fit(full_graph):
         assert x.shape[1] == 6
     # Can not test positions because retrieval from graph is not deterministic.
     # Therefore, test non zero entries only.
-    assert mapping[c.test_concept_uri_0_0].getnnz() == 1
-    assert mapping[c.test_concept_uri_01_0].getnnz() == 2
-    assert mapping[c.test_concept_uri_01_00].getnnz() == 2
-    assert mapping[c.test_concept_uri_10_0].getnnz() == 2
-    assert mapping[c.test_concept_uri_10_1].getnnz() == 2
-    assert mapping[c.test_concept_uri_100_0].getnnz() == 3
-    assert mapping[c.test_concept_uri_100_00].getnnz() == 3
-    assert mapping[c.test_concept_uri_100_01].getnnz() == 3
-    assert mapping[c.test_concept_uri_100_02].getnnz() == 3
+    assert mapping[c.test_concept_uri_0_0].nnz == 1
+    assert mapping[c.test_concept_uri_01_0].nnz == 2
+    assert mapping[c.test_concept_uri_01_00].nnz == 2
+    assert mapping[c.test_concept_uri_10_0].nnz == 2
+    assert mapping[c.test_concept_uri_10_1].nnz == 2
+    assert mapping[c.test_concept_uri_100_0].nnz == 3
+    assert mapping[c.test_concept_uri_100_00].nnz == 3
+    assert mapping[c.test_concept_uri_100_01].nnz == 3
+    assert mapping[c.test_concept_uri_100_02].nnz == 3
 
 
 def test_transform_unknown():
@@ -90,14 +90,14 @@ def test_transform_unknown():
 
     feature_dim = 12
     trans.feature_dim_ = feature_dim
-    known = csr_matrix(([1], ([0], [4])), shape=(1, feature_dim))
+    known = csr_array(([1], ([0], [4])), shape=(1, feature_dim))
     trans.mapping_ = {'key': known}
     random_results = trans.transform([
         'some random stuff edsfysdfhjsedf',
         'key'])
     assert random_results.shape == (2, feature_dim)
-    assert random_results.getrow(0).getnnz() == 0
-    assert random_results.getrow(1).getnnz() == 1
+    assert random_results[[0], :].nnz == 0
+    assert random_results[[1], :].nnz == 1
 
 
 def test_empty_relation(full_graph):
@@ -110,4 +110,4 @@ def test_empty_relation(full_graph):
     trans.fit([], [])
     features = trans.transform(['empty'])
     assert features.shape == (1, 1)
-    assert features.getnnz() == 0
+    assert features.nnz == 0
diff --git a/stwfsapy/tests/util/passthrough_transformer_test.py b/stwfsapy/tests/util/passthrough_transformer_test.py
@@ -31,9 +31,9 @@ def test_array_input():
 
 def test_sparse_input():
     in_feat = [
-        sp.lil_matrix(np.array([[1, 0, 0]])),
-        sp.lil_matrix(np.array([[0, 7, 0]])),
-        sp.lil_matrix(np.array([[0, 0, -3]]))
+        sp.lil_array(np.array([[1, 0, 0]])),
+        sp.lil_array(np.array([[0, 7, 0]])),
+        sp.lil_array(np.array([[0, 0, -3]]))
     ]
     pt = PassthroughTransformer()
     out_feat = pt.transform(in_feat)

diff --git a/stwfsapy/thesaurus_features.py b/stwfsapy/thesaurus_features.py
@@ -15,7 +15,7 @@
 
 from typing import Set, Iterable, Tuple, DefaultDict
 import rdflib
-from scipy.sparse import csr_matrix, vstack
+from scipy.sparse import csr_array, vstack
 from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.exceptions import NotFittedError
 from stwfsapy import thesaurus as t
@@ -68,7 +68,7 @@ def fit(self, X=None, y=None, **kwargs):
         }
         self.feature_dim_ = max(len(thesaurus_indices), 1)
         self.mapping_ = {
-            str(concept): csr_matrix(
+            str(concept): csr_array(
                 (
                     [1 for _ in thesaurii],
                     (
@@ -91,13 +91,13 @@ def _transform_single(self, x):
         try:
             res = self.mapping_[x]
         except KeyError:
-            res = csr_matrix(
+            res = csr_array(
                 ([], ([], [])),
                 shape=(1, self.feature_dim_)
             )
         return res
 
-    def transform(self, X) -> csr_matrix:
+    def transform(self, X) -> csr_array:
         if self.mapping_ is None:
             raise NotFittedError
         return vstack([self._transform_single(x) for x in X])