11import os
2+ import re
23from pathlib import Path
34
45import pytest
2122TEST_URL_CONTENT = "foo\n bar\n foobar"
2223
2324
25+ def _readd_double_slash_removed_by_path (path_as_posix : str ) -> str :
26+ """Path(...) on an url path like zip://file.txt::http://host.com/data.zip
27+ converts the :// to :/
28+ This function readds the ://
29+
30+ It handles cases like:
31+
32+ - https://host.com/data.zip
33+ - C://data.zip
34+ - zip://file.txt::https://host.com/data.zip
35+ - zip://file.txt::/Users/username/data.zip
36+ - zip://file.txt::C://data.zip
37+
38+ Args:
39+ path_as_posix (str): output of Path(...).as_posix()
40+
41+ Returns:
42+ str: the url path with :// instead of :/
43+ """
44+ return re .sub ("([A-z]:/)([A-z:])" , r"\g<1>/\g<2>" , path_as_posix )
45+
46+
2447@pytest .mark .parametrize (
2548 "input_path, expected_path" ,
2649 [("zip:/test.txt::/Users/username/bar.zip" , "zip://test.txt::/Users/username/bar.zip" )],
@@ -32,6 +55,7 @@ def test_as_posix(input_path, expected_path):
3255@pytest .mark .parametrize (
3356 "input_path, paths_to_join, expected_path" ,
3457 [
58+ (str (Path (__file__ ).resolve ().parent ), (Path (__file__ ).name ,), str (Path (__file__ ).resolve ())),
3559 ("https://host.com/archive.zip" , ("file.txt" ,), "https://host.com/archive.zip/file.txt" ),
3660 (
3761 "zip://::https://host.com/archive.zip" ,
@@ -57,11 +81,35 @@ def test_as_posix(input_path, expected_path):
5781)
5882def test_xjoin (input_path , paths_to_join , expected_path ):
5983 output_path = xjoin (input_path , * paths_to_join )
60- assert output_path == expected_path
84+ output_path = _readd_double_slash_removed_by_path (Path (output_path ).as_posix ())
85+ assert output_path == _readd_double_slash_removed_by_path (Path (expected_path ).as_posix ())
6186 output_path = xpathjoin (Path (input_path ), * paths_to_join )
6287 assert output_path == Path (expected_path )
6388
6489
90+ @pytest .mark .parametrize (
91+ "input_path, expected_path" ,
92+ [
93+ (str (Path (__file__ ).resolve ()), str (Path (__file__ ).resolve ().parent )),
94+ ("https://host.com/archive.zip" , "https://host.com" ),
95+ (
96+ "zip://file.txt::https://host.com/archive.zip" ,
97+ "zip://::https://host.com/archive.zip" ,
98+ ),
99+ (
100+ "zip://folder/file.txt::https://host.com/archive.zip" ,
101+ "zip://folder::https://host.com/archive.zip" ,
102+ ),
103+ ],
104+ )
105+ def test_xdirname (input_path , expected_path ):
106+ from datasets .utils .streaming_download_manager import xdirname
107+
108+ output_path = xdirname (input_path )
109+ output_path = _readd_double_slash_removed_by_path (Path (output_path ).as_posix ())
110+ assert output_path == _readd_double_slash_removed_by_path (Path (expected_path ).as_posix ())
111+
112+
65113def test_xopen_local (text_path ):
66114 with xopen (text_path , encoding = "utf-8" ) as f , open (text_path , encoding = "utf-8" ) as expected_file :
67115 assert list (f ) == list (expected_file )
@@ -99,7 +147,8 @@ def test_streaming_dl_manager_download_and_extract_no_extraction(urlpath):
99147def test_streaming_dl_manager_extract (text_gz_path , text_path ):
100148 dl_manager = StreamingDownloadManager ()
101149 output_path = dl_manager .extract (text_gz_path )
102- path = os .path .basename (text_gz_path ).rstrip (".gz" )
150+ path = os .path .basename (text_gz_path )
151+ path = path [: path .rindex ("." )]
103152 assert output_path == f"gzip://{ path } ::{ text_gz_path } "
104153 fsspec_open_file = xopen (output_path , encoding = "utf-8" )
105154 with fsspec_open_file as f , open (text_path , encoding = "utf-8" ) as expected_file :
@@ -109,7 +158,8 @@ def test_streaming_dl_manager_extract(text_gz_path, text_path):
109158def test_streaming_dl_manager_download_and_extract_with_extraction (text_gz_path , text_path ):
110159 dl_manager = StreamingDownloadManager ()
111160 output_path = dl_manager .download_and_extract (text_gz_path )
112- path = os .path .basename (text_gz_path ).rstrip (".gz" )
161+ path = os .path .basename (text_gz_path )
162+ path = path [: path .rindex ("." )]
113163 assert output_path == f"gzip://{ path } ::{ text_gz_path } "
114164 fsspec_open_file = xopen (output_path , encoding = "utf-8" )
115165 with fsspec_open_file as f , open (text_path , encoding = "utf-8" ) as expected_file :
0 commit comments