Skip to content

Commit ea261dd

Browse files
Support push_to_hub without org/user to default to logged-in user (huggingface#6629)
Revert "Support push_to_hub canonical datasets (huggingface#6519)" This reverts commit a887ee7.
1 parent 991169e commit ea261dd

File tree

3 files changed

+14
-6
lines changed

3 files changed

+14
-6
lines changed

src/datasets/arrow_dataset.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5384,13 +5384,14 @@ def push_to_hub(
53845384

53855385
api = HfApi(endpoint=config.HF_ENDPOINT, token=token)
53865386

5387-
_ = api.create_repo(
5387+
repo_url = api.create_repo(
53885388
repo_id,
53895389
token=token,
53905390
repo_type="dataset",
53915391
private=private,
53925392
exist_ok=True,
53935393
)
5394+
repo_id = repo_url.repo_id
53945395

53955396
if revision is not None:
53965397
api.create_branch(repo_id, branch=revision, token=token, repo_type="dataset", exist_ok=True)

src/datasets/dataset_dict.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1699,13 +1699,14 @@ def push_to_hub(
16991699

17001700
api = HfApi(endpoint=config.HF_ENDPOINT, token=token)
17011701

1702-
_ = api.create_repo(
1702+
repo_url = api.create_repo(
17031703
repo_id,
17041704
token=token,
17051705
repo_type="dataset",
17061706
private=private,
17071707
exist_ok=True,
17081708
)
1709+
repo_id = repo_url.repo_id
17091710

17101711
if revision is not None:
17111712
api.create_branch(repo_id, branch=revision, token=token, repo_type="dataset", exist_ok=True)

tests/test_upstream_hub.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import numpy as np
1414
import pytest
1515
from huggingface_hub import DatasetCard, HfApi
16-
from huggingface_hub.utils import RepositoryNotFoundError
1716

1817
from datasets import (
1918
Audio,
@@ -71,9 +70,16 @@ def test_push_dataset_dict_to_hub_name_without_namespace(self, temporary_repo):
7170
local_ds = DatasetDict({"train": ds})
7271

7372
with temporary_repo() as ds_name:
74-
# cannot create a repo without namespace
75-
with pytest.raises(RepositoryNotFoundError):
76-
local_ds.push_to_hub(ds_name.split("/")[-1], token=self._token)
73+
local_ds.push_to_hub(ds_name.split("/")[-1], token=self._token)
74+
hub_ds = load_dataset(ds_name, download_mode="force_redownload")
75+
76+
assert local_ds.column_names == hub_ds.column_names
77+
assert list(local_ds["train"].features.keys()) == list(hub_ds["train"].features.keys())
78+
assert local_ds["train"].features == hub_ds["train"].features
79+
80+
# Ensure that there is a single file on the repository that has the correct name
81+
files = sorted(self._api.list_repo_files(ds_name, repo_type="dataset"))
82+
assert files == [".gitattributes", "README.md", "data/train-00000-of-00001.parquet"]
7783

7884
def test_push_dataset_dict_to_hub_datasets_with_different_features(self, cleanup_repo):
7985
ds_train = Dataset.from_dict({"x": [1, 2, 3], "y": [4, 5, 6]})

0 commit comments

Comments
 (0)