Skip to content

Commit 3fbd664

Browse files
committed
fix xisfile, xgetsize, xisdir, xlistdir in private repo
1 parent 0e1c629 commit 3fbd664

File tree

1 file changed

+33
-17
lines changed

1 file changed

+33
-17
lines changed

src/datasets/download/streaming_download_manager.py

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -234,10 +234,14 @@ def xisfile(path, use_auth_token: Optional[Union[str, bool]] = None) -> bool:
234234
if is_local_path(main_hop):
235235
return os.path.isfile(path)
236236
else:
237-
if rest_hops and fsspec.get_fs_token_paths(rest_hops[0])[0].protocol == "https":
238-
storage_options = {
239-
"https": {"headers": get_authentication_headers_for_url(rest_hops[0], use_auth_token=use_auth_token)}
240-
}
237+
if not rest_hops and (main_hop.startswith("http://") or main_hop.startswith("https://")):
238+
main_hop, http_kwargs = _prepare_http_url_kwargs(main_hop, use_auth_token=use_auth_token)
239+
storage_options = http_kwargs
240+
elif rest_hops and (rest_hops[0].startswith("http://") or rest_hops[0].startswith("https://")):
241+
url = rest_hops[0]
242+
url, http_kwargs = _prepare_http_url_kwargs(url, use_auth_token=use_auth_token)
243+
storage_options = {"https": http_kwargs}
244+
path = "::".join([main_hop, url, *rest_hops[1:]])
241245
else:
242246
storage_options = None
243247
fs, *_ = fsspec.get_fs_token_paths(path, storage_options=storage_options)
@@ -257,10 +261,14 @@ def xgetsize(path, use_auth_token: Optional[Union[str, bool]] = None) -> int:
257261
if is_local_path(main_hop):
258262
return os.path.getsize(path)
259263
else:
260-
if rest_hops and fsspec.get_fs_token_paths(rest_hops[0])[0].protocol == "https":
261-
storage_options = {
262-
"https": {"headers": get_authentication_headers_for_url(rest_hops[0], use_auth_token=use_auth_token)}
263-
}
264+
if not rest_hops and (main_hop.startswith("http://") or main_hop.startswith("https://")):
265+
main_hop, http_kwargs = _prepare_http_url_kwargs(main_hop, use_auth_token=use_auth_token)
266+
storage_options = http_kwargs
267+
elif rest_hops and (rest_hops[0].startswith("http://") or rest_hops[0].startswith("https://")):
268+
url = rest_hops[0]
269+
url, http_kwargs = _prepare_http_url_kwargs(url, use_auth_token=use_auth_token)
270+
storage_options = {"https": http_kwargs}
271+
path = "::".join([main_hop, url, *rest_hops[1:]])
264272
else:
265273
storage_options = None
266274
fs, *_ = fsspec.get_fs_token_paths(path, storage_options=storage_options)
@@ -285,10 +293,14 @@ def xisdir(path, use_auth_token: Optional[Union[str, bool]] = None) -> bool:
285293
if is_local_path(main_hop):
286294
return os.path.isdir(path)
287295
else:
288-
if rest_hops and fsspec.get_fs_token_paths(rest_hops[0])[0].protocol == "https":
289-
storage_options = {
290-
"https": {"headers": get_authentication_headers_for_url(rest_hops[0], use_auth_token=use_auth_token)}
291-
}
296+
if not rest_hops and (main_hop.startswith("http://") or main_hop.startswith("https://")):
297+
main_hop, http_kwargs = _prepare_http_url_kwargs(main_hop, use_auth_token=use_auth_token)
298+
storage_options = http_kwargs
299+
elif rest_hops and (rest_hops[0].startswith("http://") or rest_hops[0].startswith("https://")):
300+
url = rest_hops[0]
301+
url, http_kwargs = _prepare_http_url_kwargs(url, use_auth_token=use_auth_token)
302+
storage_options = {"https": http_kwargs}
303+
path = "::".join([main_hop, url, *rest_hops[1:]])
292304
else:
293305
storage_options = None
294306
fs, *_ = fsspec.get_fs_token_paths(path, storage_options=storage_options)
@@ -463,14 +475,18 @@ def xlistdir(path: str, use_auth_token: Optional[Union[str, bool]] = None) -> Li
463475
return os.listdir(path)
464476
else:
465477
# globbing inside a zip in a private repo requires authentication
466-
if rest_hops and fsspec.get_fs_token_paths(rest_hops[0])[0].protocol == "https":
467-
storage_options = {
468-
"https": {"headers": get_authentication_headers_for_url(rest_hops[0], use_auth_token=use_auth_token)}
469-
}
478+
if not rest_hops and (main_hop.startswith("http://") or main_hop.startswith("https://")):
479+
main_hop, http_kwargs = _prepare_http_url_kwargs(main_hop, use_auth_token=use_auth_token)
480+
storage_options = http_kwargs
481+
elif rest_hops and (rest_hops[0].startswith("http://") or rest_hops[0].startswith("https://")):
482+
url = rest_hops[0]
483+
url, http_kwargs = _prepare_http_url_kwargs(url, use_auth_token=use_auth_token)
484+
storage_options = {"https": http_kwargs}
485+
path = "::".join([main_hop, url, *rest_hops[1:]])
470486
else:
471487
storage_options = None
472488
fs, *_ = fsspec.get_fs_token_paths(path, storage_options=storage_options)
473-
objects = fs.listdir(main_hop.split("://")[1])
489+
objects = fs.listdir(main_hop)
474490
return [os.path.basename(obj["name"]) for obj in objects]
475491

476492

0 commit comments

Comments
 (0)