Skip to content

Commit 43714db

Browse files
Use xjoin
1 parent f910e1e commit 43714db

File tree

1 file changed

+10
-27
lines changed

1 file changed

+10
-27
lines changed

src/datasets/builder.py

Lines changed: 10 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import copy
2121
import inspect
2222
import os
23-
import posixpath
2423
import shutil
2524
import textwrap
2625
import urllib
@@ -60,7 +59,7 @@
6059
size_str,
6160
temporary_assignment,
6261
)
63-
from .utils.streaming_download_manager import StreamingDownloadManager
62+
from .utils.streaming_download_manager import StreamingDownloadManager, xjoin
6463

6564

6665
logger = logging.get_logger(__name__)
@@ -287,20 +286,11 @@ def __init__(
287286
self.info.features = features
288287

289288
# prepare data dirs
290-
self._cache_dir_root = str(cache_dir or config.HF_DATASETS_CACHE)
291-
self._cache_dir_root = (
292-
self._cache_dir_root if is_remote_url(self._cache_dir_root) else os.path.expanduser(self._cache_dir_root)
293-
)
294-
path_join = posixpath.join if is_remote_url(self._cache_dir_root) else os.path.join
289+
self._cache_dir_root = os.path.expanduser(cache_dir or config.HF_DATASETS_CACHE)
295290
self._cache_downloaded_dir = (
296-
path_join(self._cache_dir_root, config.DOWNLOADED_DATASETS_DIR)
291+
xjoin(self._cache_dir_root, config.DOWNLOADED_DATASETS_DIR)
297292
if cache_dir
298-
else str(config.DOWNLOADED_DATASETS_PATH)
299-
)
300-
self._cache_downloaded_dir = (
301-
self._cache_downloaded_dir
302-
if is_remote_url(self._cache_downloaded_dir)
303-
else os.path.expanduser(self._cache_downloaded_dir)
293+
else config.DOWNLOADED_DATASETS_PATH
304294
)
305295
self._cache_dir = self._build_cache_dir()
306296
if not is_remote_url(self._cache_dir_root):
@@ -450,7 +440,7 @@ def builder_configs(cls):
450440
def cache_dir(self):
451441
return self._cache_dir
452442

453-
def _relative_data_dir(self, with_version=True, with_hash=True, is_local=True) -> str:
443+
def _relative_data_dir(self, with_version=True, with_hash=True) -> str:
454444
"""Relative path of this dataset in cache_dir:
455445
Will be:
456446
self.name/self.config.version/self.hash/
@@ -462,26 +452,19 @@ def _relative_data_dir(self, with_version=True, with_hash=True, is_local=True) -
462452
builder_data_dir = self.name if namespace is None else f"{namespace}___{self.name}"
463453
builder_config = self.config
464454
hash = self.hash
465-
path_join = os.path.join if is_local else posixpath.join
466455
if builder_config:
467456
# use the enriched name instead of the name to make it unique
468-
builder_data_dir = path_join(builder_data_dir, self.config_id)
457+
builder_data_dir = xjoin(builder_data_dir, self.config_id)
469458
if with_version:
470-
builder_data_dir = path_join(builder_data_dir, str(self.config.version))
459+
builder_data_dir = xjoin(builder_data_dir, str(self.config.version))
471460
if with_hash and hash and isinstance(hash, str):
472-
builder_data_dir = path_join(builder_data_dir, hash)
461+
builder_data_dir = xjoin(builder_data_dir, hash)
473462
return builder_data_dir
474463

475464
def _build_cache_dir(self):
476465
"""Return the data directory for the current version."""
477-
is_local = not is_remote_url(self._cache_dir_root)
478-
path_join = os.path.join if is_local else posixpath.join
479-
builder_data_dir = path_join(
480-
self._cache_dir_root, self._relative_data_dir(with_version=False, is_local=is_local)
481-
)
482-
version_data_dir = path_join(
483-
self._cache_dir_root, self._relative_data_dir(with_version=True, is_local=is_local)
484-
)
466+
builder_data_dir = xjoin(self._cache_dir_root, self._relative_data_dir(with_version=False))
467+
version_data_dir = xjoin(self._cache_dir_root, self._relative_data_dir(with_version=True))
485468

486469
def _other_versions_on_disk():
487470
"""Returns previous versions on disk."""

0 commit comments

Comments
 (0)