2020import copy
2121import inspect
2222import os
23- import posixpath
2423import shutil
2524import textwrap
2625import urllib
6059 size_str ,
6160 temporary_assignment ,
6261)
63- from .utils .streaming_download_manager import StreamingDownloadManager
62+ from .utils .streaming_download_manager import StreamingDownloadManager , xjoin
6463
6564
6665logger = logging .get_logger (__name__ )
@@ -287,20 +286,11 @@ def __init__(
287286 self .info .features = features
288287
289288 # prepare data dirs
290- self ._cache_dir_root = str (cache_dir or config .HF_DATASETS_CACHE )
291- self ._cache_dir_root = (
292- self ._cache_dir_root if is_remote_url (self ._cache_dir_root ) else os .path .expanduser (self ._cache_dir_root )
293- )
294- path_join = posixpath .join if is_remote_url (self ._cache_dir_root ) else os .path .join
289+ self ._cache_dir_root = os .path .expanduser (cache_dir or config .HF_DATASETS_CACHE )
295290 self ._cache_downloaded_dir = (
296- path_join (self ._cache_dir_root , config .DOWNLOADED_DATASETS_DIR )
291+ xjoin (self ._cache_dir_root , config .DOWNLOADED_DATASETS_DIR )
297292 if cache_dir
298- else str (config .DOWNLOADED_DATASETS_PATH )
299- )
300- self ._cache_downloaded_dir = (
301- self ._cache_downloaded_dir
302- if is_remote_url (self ._cache_downloaded_dir )
303- else os .path .expanduser (self ._cache_downloaded_dir )
293+ else config .DOWNLOADED_DATASETS_PATH
304294 )
305295 self ._cache_dir = self ._build_cache_dir ()
306296 if not is_remote_url (self ._cache_dir_root ):
@@ -450,7 +440,7 @@ def builder_configs(cls):
450440 def cache_dir (self ):
451441 return self ._cache_dir
452442
453- def _relative_data_dir (self , with_version = True , with_hash = True , is_local = True ) -> str :
443+ def _relative_data_dir (self , with_version = True , with_hash = True ) -> str :
454444 """Relative path of this dataset in cache_dir:
455445 Will be:
456446 self.name/self.config.version/self.hash/
@@ -462,26 +452,19 @@ def _relative_data_dir(self, with_version=True, with_hash=True, is_local=True) -
462452 builder_data_dir = self .name if namespace is None else f"{ namespace } ___{ self .name } "
463453 builder_config = self .config
464454 hash = self .hash
465- path_join = os .path .join if is_local else posixpath .join
466455 if builder_config :
467456 # use the enriched name instead of the name to make it unique
468- builder_data_dir = path_join (builder_data_dir , self .config_id )
457+ builder_data_dir = xjoin (builder_data_dir , self .config_id )
469458 if with_version :
470- builder_data_dir = path_join (builder_data_dir , str (self .config .version ))
459+ builder_data_dir = xjoin (builder_data_dir , str (self .config .version ))
471460 if with_hash and hash and isinstance (hash , str ):
472- builder_data_dir = path_join (builder_data_dir , hash )
461+ builder_data_dir = xjoin (builder_data_dir , hash )
473462 return builder_data_dir
474463
475464 def _build_cache_dir (self ):
476465 """Return the data directory for the current version."""
477- is_local = not is_remote_url (self ._cache_dir_root )
478- path_join = os .path .join if is_local else posixpath .join
479- builder_data_dir = path_join (
480- self ._cache_dir_root , self ._relative_data_dir (with_version = False , is_local = is_local )
481- )
482- version_data_dir = path_join (
483- self ._cache_dir_root , self ._relative_data_dir (with_version = True , is_local = is_local )
484- )
466+ builder_data_dir = xjoin (self ._cache_dir_root , self ._relative_data_dir (with_version = False ))
467+ version_data_dir = xjoin (self ._cache_dir_root , self ._relative_data_dir (with_version = True ))
485468
486469 def _other_versions_on_disk ():
487470 """Returns previous versions on disk."""
0 commit comments