Skip to content

Commit a887ee7

Browse files
Support push_to_hub canonical datasets (huggingface#6519)
* Support push_to_hub canonical datasets * Fix test
1 parent 3f14920 commit a887ee7

File tree

3 files changed

+6
-14
lines changed

3 files changed

+6
-14
lines changed

src/datasets/arrow_dataset.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5352,14 +5352,13 @@ def push_to_hub(
53525352

53535353
api = HfApi(endpoint=config.HF_ENDPOINT, token=token)
53545354

5355-
repo_url = api.create_repo(
5355+
_ = api.create_repo(
53565356
repo_id,
53575357
token=token,
53585358
repo_type="dataset",
53595359
private=private,
53605360
exist_ok=True,
53615361
)
5362-
repo_id = repo_url.repo_id
53635362

53645363
if revision is not None:
53655364
api.create_branch(repo_id, branch=revision, token=token, repo_type="dataset", exist_ok=True)

src/datasets/dataset_dict.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1679,14 +1679,13 @@ def push_to_hub(
16791679

16801680
api = HfApi(endpoint=config.HF_ENDPOINT, token=token)
16811681

1682-
repo_url = api.create_repo(
1682+
_ = api.create_repo(
16831683
repo_id,
16841684
token=token,
16851685
repo_type="dataset",
16861686
private=private,
16871687
exist_ok=True,
16881688
)
1689-
repo_id = repo_url.repo_id
16901689

16911690
if revision is not None:
16921691
api.create_branch(repo_id, branch=revision, token=token, repo_type="dataset", exist_ok=True)

tests/test_upstream_hub.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import numpy as np
1414
import pytest
1515
from huggingface_hub import DatasetCard, HfApi
16+
from huggingface_hub.utils import RepositoryNotFoundError
1617

1718
from datasets import (
1819
Audio,
@@ -70,16 +71,9 @@ def test_push_dataset_dict_to_hub_name_without_namespace(self, temporary_repo):
7071
local_ds = DatasetDict({"train": ds})
7172

7273
with temporary_repo() as ds_name:
73-
local_ds.push_to_hub(ds_name.split("/")[-1], token=self._token)
74-
hub_ds = load_dataset(ds_name, download_mode="force_redownload")
75-
76-
assert local_ds.column_names == hub_ds.column_names
77-
assert list(local_ds["train"].features.keys()) == list(hub_ds["train"].features.keys())
78-
assert local_ds["train"].features == hub_ds["train"].features
79-
80-
# Ensure that there is a single file on the repository that has the correct name
81-
files = sorted(self._api.list_repo_files(ds_name, repo_type="dataset"))
82-
assert files == [".gitattributes", "README.md", "data/train-00000-of-00001.parquet"]
74+
# cannot create a repo without namespace
75+
with pytest.raises(RepositoryNotFoundError):
76+
local_ds.push_to_hub(ds_name.split("/")[-1], token=self._token)
8377

8478
def test_push_dataset_dict_to_hub_datasets_with_different_features(self, cleanup_repo):
8579
ds_train = Dataset.from_dict({"x": [1, 2, 3], "y": [4, 5, 6]})

0 commit comments

Comments
 (0)