Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion sparsity/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,21 @@ def traildb_to_coo(db, fieldname):

def to_npz(sf, filename):
data = _csr_to_dict(sf.data)
data['metadata'] = \
{'multiindex': True if isinstance(sf.index, pd.MultiIndex) else False}
data['frame_index'] = sf.index.values
data['frame_columns'] = sf.columns.values
np.savez(filename, **data)


def read_npz(filename):
loader = np.load(filename)
csr_mat = _load_csr(loader)
idx = loader['frame_index']
idx = _load_idx_from_npz(loader)
cols = loader['frame_columns']
return (csr_mat, idx, cols)


def _csr_to_dict(array):
return dict(data = array.data ,indices=array.indices,
indptr =array.indptr, shape=array.shape)
Expand All @@ -48,6 +52,17 @@ def _load_csr(loader):
shape=loader['shape'])


def _load_idx_from_npz(loader):
idx = loader['frame_index']
try:
if loader['metadata'][()]['multiindex']:
idx = pd.MultiIndex.from_tuples(idx)
except KeyError:
if all(map(lambda x: isinstance(x, tuple), idx)):
idx = pd.MultiIndex.from_tuples(idx)
return idx


def _just_read_array(path):
if path.endswith('hdf') or path.endswith('hdf5'):
return pd.read_hdf(path, '/df').values
Expand Down
21 changes: 21 additions & 0 deletions sparsity/test/test_sparse_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from scipy import sparse

from sparsity import SparseFrame, sparse_one_hot
from sparsity.io import _csr_to_dict

from .conftest import tmpdir

Expand Down Expand Up @@ -217,6 +218,26 @@ def test_set_index(sf_midx):
# assert np.all(sf.loc[[4, 5]].data.todense() == np.identity(5)[[3, 4]])


def test_save_load_multiindex(sf_midx):
with tmpdir() as tmp:
# test new
path = os.path.join(tmp, 'sf.npz')
sf_midx.to_npz(path)
res = SparseFrame.read_npz(path)
assert isinstance(res.index, pd.MultiIndex)

# test backwards compatibility
def _to_npz_legacy(sf, filename):
data = _csr_to_dict(sf.data)
data['frame_index'] = sf.index.values
data['frame_columns'] = sf.columns.values
np.savez(filename, **data)

_to_npz_legacy(sf_midx, path)
res = SparseFrame.read_npz(path)
assert isinstance(res.index, pd.MultiIndex)


def test_new_column_assign_array():
sf = SparseFrame(np.identity(5))
sf[6] = np.ones(5)
Expand Down