Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/datasets/streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from .utils.logging import get_logger
from .utils.patching import patch_submodule
from .utils.streaming_download_manager import xdirname, xjoin, xopen, xpathjoin, xpathopen
from .utils.streaming_download_manager import xdirname, xjoin, xopen, xpathjoin, xpathopen, xpathstem, xpathsuffix


logger = get_logger(__name__)
Expand Down Expand Up @@ -43,3 +43,5 @@ def extend_module_for_streaming(module_path, use_auth_token: Optional[Union[str,
patch.object(module.Path, "joinpath", xpathjoin).start()
patch.object(module.Path, "__truediv__", xpathjoin).start()
patch.object(module.Path, "open", xpathopen).start()
patch.object(module.Path, "stem", property(fget=xpathstem)).start()
patch.object(module.Path, "suffix", property(fget=xpathsuffix)).start()
26 changes: 25 additions & 1 deletion src/datasets/utils/streaming_download_manager.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import re
import time
from pathlib import Path
from pathlib import Path, PurePosixPath
from typing import Optional, Tuple

import fsspec
Expand Down Expand Up @@ -180,6 +180,30 @@ def xpathopen(path: Path, **kwargs):
return xopen(_as_posix(path), **kwargs)


def xpathstem(path: Path):
"""Stem function for argument of type :obj:`~pathlib.Path` that supports both local paths end remote URLs.

Args:
path (:obj:`~pathlib.Path`): Calling Path instance.

Returns:
:obj:`str`
"""
return PurePosixPath(_as_posix(path).split("::")[0]).stem


def xpathsuffix(path: Path):
"""Suffix function for argument of type :obj:`~pathlib.Path` that supports both local paths end remote URLs.

Args:
path (:obj:`~pathlib.Path`): Calling Path instance.

Returns:
:obj:`str`
"""
return PurePosixPath(_as_posix(path).split("::")[0]).suffix


class StreamingDownloadManager(object):
"""
Download manager that uses the "::" separator to navigate through (possibly remote) compressed archives.
Expand Down
26 changes: 26 additions & 0 deletions tests/test_streaming_download_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
xopen,
xpathjoin,
xpathopen,
xpathstem,
xpathsuffix,
)

from .utils import require_lz4, require_zstandard
Expand Down Expand Up @@ -124,6 +126,30 @@ def test_xopen_remote():
assert list(f) == TEST_URL_CONTENT.splitlines(keepends=True)


@pytest.mark.parametrize(
"input_path, expected",
[
("zip://file.txt::https://host.com/archive.zip", "file"),
("file.txt", "file"),
((Path().resolve() / "file.txt").as_posix(), "file"),
],
)
def test_xpathstem(input_path, expected):
assert xpathstem(Path(input_path)) == expected


@pytest.mark.parametrize(
"input_path, expected",
[
("zip://file.txt::https://host.com/archive.zip", ".txt"),
("file.txt", ".txt"),
((Path().resolve() / "file.txt").as_posix(), ".txt"),
],
)
def test_xpathsuffix(input_path, expected):
assert xpathsuffix(Path(input_path)) == expected


@pytest.mark.parametrize("urlpath", [r"C:\\foo\bar.txt", "/foo/bar.txt", "https://f.oo/bar.txt"])
def test_streaming_dl_manager_download_dummy_path(urlpath):
dl_manager = StreamingDownloadManager()
Expand Down