Skip to content

Commit 65c2037

Browse files
committed
test ArrowWriter with filesystem
1 parent 93d5660 commit 65c2037

File tree

3 files changed

+129
-0
lines changed

3 files changed

+129
-0
lines changed

tests/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from datasets.arrow_dataset import Dataset
1616
from datasets.features import ClassLabel, Features, Sequence, Value
1717

18+
from .fsspec_fixtures import * # noqa: load fsspec fixtures
1819
from .hub_fixtures import * # noqa: load hub fixtures
1920
from .s3_fixtures import * # noqa: load s3 fixtures
2021

tests/fsspec_fixtures.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
import posixpath
2+
from pathlib import Path
3+
4+
import fsspec
5+
import pytest
6+
from fsspec.implementations.local import AbstractFileSystem, LocalFileSystem, stringify_path
7+
8+
9+
class MockFileSystem(AbstractFileSystem):
10+
protocol = "mock"
11+
12+
def __init__(self, *args, local_root_dir, **kwargs):
13+
super().__init__()
14+
self._fs = LocalFileSystem(*args, **kwargs)
15+
self.local_root_dir = Path(local_root_dir).as_posix()
16+
17+
def mkdir(self, path, *args, **kwargs):
18+
path = posixpath.join(self.local_root_dir, self._strip_protocol(path))
19+
return self._fs.mkdir(path, *args, **kwargs)
20+
21+
def makedirs(self, path, *args, **kwargs):
22+
path = posixpath.join(self.local_root_dir, self._strip_protocol(path))
23+
return self._fs.makedirs(path, *args, **kwargs)
24+
25+
def rmdir(self, path):
26+
path = posixpath.join(self.local_root_dir, self._strip_protocol(path))
27+
return self._fs.rmdir(path)
28+
29+
def ls(self, path, *args, **kwargs):
30+
path = posixpath.join(self.local_root_dir, self._strip_protocol(path))
31+
return self._fs.ls(path, *args, **kwargs)
32+
33+
def glob(self, path, *args, **kwargs):
34+
path = posixpath.join(self.local_root_dir, self._strip_protocol(path))
35+
return self._fs.glob(path, *args, **kwargs)
36+
37+
def info(self, path, *args, **kwargs):
38+
path = posixpath.join(self.local_root_dir, self._strip_protocol(path))
39+
return self._fs.info(path, *args, **kwargs)
40+
41+
def lexists(self, path, *args, **kwargs):
42+
path = posixpath.join(self.local_root_dir, self._strip_protocol(path))
43+
return self._fs.lexists(path, *args, **kwargs)
44+
45+
def cp_file(self, path1, path2, *args, **kwargs):
46+
path1 = posixpath.join(self.local_root_dir, self._strip_protocol(path1))
47+
path2 = posixpath.join(self.local_root_dir, self._strip_protocol(path2))
48+
return self._fs.cp_file(path1, path2, *args, **kwargs)
49+
50+
def get_file(self, path1, path2, *args, **kwargs):
51+
path1 = posixpath.join(self.local_root_dir, self._strip_protocol(path1))
52+
path2 = posixpath.join(self.local_root_dir, self._strip_protocol(path2))
53+
return self._fs.get_file(path1, path2, *args, **kwargs)
54+
55+
def put_file(self, path1, path2, *args, **kwargs):
56+
path1 = posixpath.join(self.local_root_dir, self._strip_protocol(path1))
57+
path2 = posixpath.join(self.local_root_dir, self._strip_protocol(path2))
58+
return self._fs.put_file(path1, path2, *args, **kwargs)
59+
60+
def mv_file(self, path1, path2, *args, **kwargs):
61+
path1 = posixpath.join(self.local_root_dir, self._strip_protocol(path1))
62+
path2 = posixpath.join(self.local_root_dir, self._strip_protocol(path2))
63+
return self._fs.mv_file(path1, path2, *args, **kwargs)
64+
65+
def rm_file(self, path):
66+
path = posixpath.join(self.local_root_dir, self._strip_protocol(path))
67+
return self._fs.rm_file(path)
68+
69+
def rm(self, path, *args, **kwargs):
70+
path = posixpath.join(self.local_root_dir, self._strip_protocol(path))
71+
return self._fs.rm(path, *args, **kwargs)
72+
73+
def _open(self, path, *args, **kwargs):
74+
path = posixpath.join(self.local_root_dir, self._strip_protocol(path))
75+
return self._fs._open(path, *args, **kwargs)
76+
77+
def open(self, path, *args, **kwargs):
78+
path = posixpath.join(self.local_root_dir, self._strip_protocol(path))
79+
return self._fs.open(path, *args, **kwargs)
80+
81+
def touch(self, path, *args, **kwargs):
82+
path = posixpath.join(self.local_root_dir, self._strip_protocol(path))
83+
return self._fs.touch(path, *args, **kwargs)
84+
85+
def created(self, path):
86+
path = posixpath.join(self.local_root_dir, self._strip_protocol(path))
87+
return self._fs.created(path)
88+
89+
def modified(self, path):
90+
path = posixpath.join(self.local_root_dir, self._strip_protocol(path))
91+
return self._fs.modified(path)
92+
93+
@classmethod
94+
def _parent(cls, path):
95+
return LocalFileSystem._parent(path)
96+
97+
@classmethod
98+
def _strip_protocol(cls, path):
99+
path = stringify_path(path)
100+
if path.startswith("mock://"):
101+
path = path[7:]
102+
return path
103+
104+
def chmod(self, path, *args, **kwargs):
105+
path = posixpath.join(self.local_root_dir, self._strip_protocol(path))
106+
return self._fs.mkdir(path, *args, **kwargs)
107+
108+
109+
@pytest.fixture
110+
def mock_fsspec(monkeypatch):
111+
monkeypatch.setitem(fsspec.registry.target, "mock", MockFileSystem)
112+
113+
114+
@pytest.fixture
115+
def mockfs(tmp_path_factory, mock_fsspec):
116+
local_fs_dir = tmp_path_factory.mktemp("mockfs")
117+
return MockFileSystem(local_root_dir=local_fs_dir)

tests/test_arrow_writer.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,3 +299,14 @@ def test_arrow_writer_closes_stream(raise_exception, tmp_path):
299299
pass
300300
finally:
301301
assert writer.stream.closed
302+
303+
304+
def test_arrow_writer_with_filesystem(mockfs):
305+
path = "mock://dataset-train.arrow"
306+
with ArrowWriter(path=path, storage_options=mockfs.storage_options) as writer:
307+
writer.write({"col_1": "foo", "col_2": 1})
308+
writer.write({"col_1": "bar", "col_2": 2})
309+
num_examples, num_bytes = writer.finalize()
310+
assert num_examples == 2
311+
assert num_bytes > 0
312+
assert mockfs.exists(path)

0 commit comments

Comments
 (0)