Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/datasets/arrow_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@
from .utils import tqdm as hf_tqdm
from .utils.deprecation_utils import deprecated
from .utils.file_utils import estimate_dataset_size
from .utils.hub import preupload_lfs_files
from .utils.hub import list_files_info, preupload_lfs_files
from .utils.info_utils import is_small_dataset
from .utils.metadata import MetadataConfigs
from .utils.py_utils import (
Expand Down Expand Up @@ -5379,7 +5379,7 @@ def push_to_hub(
deletions, deleted_size = [], 0
repo_splits = [] # use a list to keep the order of the splits
repo_files_to_add = [addition.path_in_repo for addition in additions]
for repo_file in api.list_files_info(repo_id, revision=revision, repo_type="dataset", token=token):
for repo_file in list_files_info(api, repo_id=repo_id, revision=revision, repo_type="dataset", token=token):
if repo_file.rfilename == "README.md":
repo_with_dataset_card = True
elif repo_file.rfilename == config.DATASETDICT_INFOS_FILENAME:
Expand Down
3 changes: 2 additions & 1 deletion src/datasets/dataset_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from .utils import logging
from .utils.deprecation_utils import deprecated
from .utils.doc_utils import is_documented_by
from .utils.hub import list_files_info
from .utils.metadata import MetadataConfigs
from .utils.py_utils import asdict, glob_pattern_to_regex, string_to_dict
from .utils.typing import PathLike
Expand Down Expand Up @@ -1722,7 +1723,7 @@ def push_to_hub(
repo_splits = [] # use a list to keep the order of the splits
deletions = []
repo_files_to_add = [addition.path_in_repo for addition in additions]
for repo_file in api.list_files_info(repo_id, revision=revision, repo_type="dataset", token=token):
for repo_file in list_files_info(api, repo_id=repo_id, revision=revision, repo_type="dataset", token=token):
if repo_file.rfilename == "README.md":
repo_with_dataset_card = True
elif repo_file.rfilename == config.DATASETDICT_INFOS_FILENAME:
Expand Down
14 changes: 14 additions & 0 deletions src/datasets/utils/hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from functools import partial

from huggingface_hub import HfApi, hf_hub_url
from huggingface_hub.hf_api import RepoFile
from packaging import version
from requests import ConnectionError, HTTPError

Expand Down Expand Up @@ -45,5 +46,18 @@ def preupload_lfs_files(hf_api: HfApi, **kwargs):
hf_api.preupload_lfs_files(**kwargs)


# `list_files_info` is deprecated in favor of `list_repo_tree` in `huggingface_hub>=0.20.0`
if config.HF_HUB_VERSION < version.parse("0.20.0"):

def list_files_info(hf_api: HfApi, **kwargs):
yield from hf_api.list_files_info(**kwargs)
else:

def list_files_info(hf_api: HfApi, **kwargs):
for repo_path in hf_api.list_repo_tree(**kwargs):
if isinstance(repo_path, RepoFile):
yield repo_path


# bakckward compatibility
hf_hub_url = partial(hf_hub_url, repo_type="dataset")