Skip to content

Commit fcfcc95

Browse files
Refactor Hub tests (#4729)
* Replace with_staging_testing with staging_hub_config fixture * Replace setUp/tearDown with set_staging_access_token fixture * Reset class token * Create cleanup_repo and temporary_repo fixtures * Refactor test_upstream_hub with fixtures * Rename stagging variables to ci_hub
1 parent 6a1c6b1 commit fcfcc95

File tree

2 files changed

+233
-299
lines changed

2 files changed

+233
-299
lines changed

tests/fixtures/hub.py

Lines changed: 69 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import os.path
12
import time
3+
from contextlib import contextmanager
24
from unittest.mock import patch
35

46
import pytest
@@ -8,36 +10,79 @@
810
from datasets.utils._hf_hub_fixes import create_repo, delete_repo
911

1012

11-
USER = "__DUMMY_TRANSFORMERS_USER__"
12-
FULL_NAME = "Dummy User"
13-
TOKEN = "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"
13+
CI_HUB_USER = "__DUMMY_TRANSFORMERS_USER__"
14+
CI_HUB_USER_FULL_NAME = "Dummy User"
15+
CI_HUB_USER_TOKEN = "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"
1416

15-
ENDPOINT_STAGING = "https://hub-ci.huggingface.co"
16-
ENDPOINT_STAGING_DATASETS_URL = ENDPOINT_STAGING + "/datasets/{repo_id}/resolve/{revision}/{path}"
17+
CI_HUB_ENDPOINT = "https://hub-ci.huggingface.co"
18+
CI_HUB_DATASETS_URL = CI_HUB_ENDPOINT + "/datasets/{repo_id}/resolve/{revision}/{path}"
19+
CI_HUB_TOKEN_PATH = os.path.expanduser("~/.huggingface/hub_ci_token")
20+
21+
22+
@pytest.fixture
23+
def ci_hub_config(monkeypatch):
24+
monkeypatch.setattr("datasets.config.HF_ENDPOINT", CI_HUB_ENDPOINT)
25+
monkeypatch.setattr("datasets.config.HUB_DATASETS_URL", CI_HUB_DATASETS_URL)
26+
27+
28+
@pytest.fixture
29+
def ci_hub_token_path(monkeypatch):
30+
monkeypatch.setattr("huggingface_hub.hf_api.HfFolder.path_token", CI_HUB_TOKEN_PATH)
31+
32+
33+
@pytest.fixture
34+
def set_ci_hub_access_token(ci_hub_config, ci_hub_token_path):
35+
_api = HfApi(endpoint=CI_HUB_ENDPOINT)
36+
_api.set_access_token(CI_HUB_USER_TOKEN)
37+
HfFolder.save_token(CI_HUB_USER_TOKEN)
38+
yield
39+
HfFolder.delete_token()
40+
_api.unset_access_token()
1741

1842

1943
@pytest.fixture(scope="session")
2044
def hf_api():
21-
return HfApi(endpoint=ENDPOINT_STAGING)
45+
return HfApi(endpoint=CI_HUB_ENDPOINT)
2246

2347

2448
@pytest.fixture(scope="session")
2549
def hf_token(hf_api: HfApi):
26-
hf_api.set_access_token(TOKEN)
27-
HfFolder.save_token(TOKEN)
50+
hf_api.set_access_token(CI_HUB_USER_TOKEN)
51+
HfFolder.save_token(CI_HUB_USER_TOKEN)
2852

29-
yield TOKEN
53+
yield CI_HUB_USER_TOKEN
3054
try:
3155
hf_api.unset_access_token()
3256
except requests.exceptions.HTTPError:
3357
pass
3458

3559

60+
@pytest.fixture
61+
def cleanup_repo(hf_api):
62+
def _cleanup_repo(repo_id):
63+
organization, name = repo_id.split("/")
64+
delete_repo(hf_api=hf_api, name=name, organization=organization, token=CI_HUB_USER_TOKEN, repo_type="dataset")
65+
66+
return _cleanup_repo
67+
68+
69+
@pytest.fixture
70+
def temporary_repo(cleanup_repo):
71+
@contextmanager
72+
def _temporary_repo(repo_id):
73+
try:
74+
yield repo_id
75+
finally:
76+
cleanup_repo(repo_id)
77+
78+
return _temporary_repo
79+
80+
3681
@pytest.fixture(scope="session")
3782
def hf_private_dataset_repo_txt_data_(hf_api: HfApi, hf_token, text_file):
3883
repo_name = f"repo_txt_data-{int(time.time() * 10e3)}"
39-
create_repo(hf_api, repo_name, token=hf_token, organization=USER, repo_type="dataset", private=True)
40-
repo_id = f"{USER}/{repo_name}"
84+
create_repo(hf_api, repo_name, token=hf_token, organization=CI_HUB_USER, repo_type="dataset", private=True)
85+
repo_id = f"{CI_HUB_USER}/{repo_name}"
4186
hf_api.upload_file(
4287
token=hf_token,
4388
path_or_fileobj=str(text_file),
@@ -47,23 +92,23 @@ def hf_private_dataset_repo_txt_data_(hf_api: HfApi, hf_token, text_file):
4792
)
4893
yield repo_id
4994
try:
50-
delete_repo(hf_api, repo_name, token=hf_token, organization=USER, repo_type="dataset")
95+
delete_repo(hf_api, repo_name, token=hf_token, organization=CI_HUB_USER, repo_type="dataset")
5196
except (requests.exceptions.HTTPError, ValueError): # catch http error and token invalid error
5297
pass
5398

5499

55100
@pytest.fixture()
56101
def hf_private_dataset_repo_txt_data(hf_private_dataset_repo_txt_data_):
57-
with patch("datasets.config.HF_ENDPOINT", ENDPOINT_STAGING):
58-
with patch("datasets.config.HUB_DATASETS_URL", ENDPOINT_STAGING_DATASETS_URL):
102+
with patch("datasets.config.HF_ENDPOINT", CI_HUB_ENDPOINT):
103+
with patch("datasets.config.HUB_DATASETS_URL", CI_HUB_DATASETS_URL):
59104
yield hf_private_dataset_repo_txt_data_
60105

61106

62107
@pytest.fixture(scope="session")
63108
def hf_private_dataset_repo_zipped_txt_data_(hf_api: HfApi, hf_token, zip_csv_with_dir_path):
64109
repo_name = f"repo_zipped_txt_data-{int(time.time() * 10e3)}"
65-
create_repo(hf_api, repo_name, token=hf_token, organization=USER, repo_type="dataset", private=True)
66-
repo_id = f"{USER}/{repo_name}"
110+
create_repo(hf_api, repo_name, token=hf_token, organization=CI_HUB_USER, repo_type="dataset", private=True)
111+
repo_id = f"{CI_HUB_USER}/{repo_name}"
67112
hf_api.upload_file(
68113
token=hf_token,
69114
path_or_fileobj=str(zip_csv_with_dir_path),
@@ -73,23 +118,23 @@ def hf_private_dataset_repo_zipped_txt_data_(hf_api: HfApi, hf_token, zip_csv_wi
73118
)
74119
yield repo_id
75120
try:
76-
delete_repo(hf_api, repo_name, token=hf_token, organization=USER, repo_type="dataset")
121+
delete_repo(hf_api, repo_name, token=hf_token, organization=CI_HUB_USER, repo_type="dataset")
77122
except (requests.exceptions.HTTPError, ValueError): # catch http error and token invalid error
78123
pass
79124

80125

81126
@pytest.fixture()
82127
def hf_private_dataset_repo_zipped_txt_data(hf_private_dataset_repo_zipped_txt_data_):
83-
with patch("datasets.config.HF_ENDPOINT", ENDPOINT_STAGING):
84-
with patch("datasets.config.HUB_DATASETS_URL", ENDPOINT_STAGING_DATASETS_URL):
128+
with patch("datasets.config.HF_ENDPOINT", CI_HUB_ENDPOINT):
129+
with patch("datasets.config.HUB_DATASETS_URL", CI_HUB_DATASETS_URL):
85130
yield hf_private_dataset_repo_zipped_txt_data_
86131

87132

88133
@pytest.fixture(scope="session")
89134
def hf_private_dataset_repo_zipped_img_data_(hf_api: HfApi, hf_token, zip_image_path):
90135
repo_name = f"repo_zipped_img_data-{int(time.time() * 10e3)}"
91-
create_repo(hf_api, repo_name, token=hf_token, organization=USER, repo_type="dataset", private=True)
92-
repo_id = f"{USER}/{repo_name}"
136+
create_repo(hf_api, repo_name, token=hf_token, organization=CI_HUB_USER, repo_type="dataset", private=True)
137+
repo_id = f"{CI_HUB_USER}/{repo_name}"
93138
hf_api.upload_file(
94139
token=hf_token,
95140
path_or_fileobj=str(zip_image_path),
@@ -99,13 +144,13 @@ def hf_private_dataset_repo_zipped_img_data_(hf_api: HfApi, hf_token, zip_image_
99144
)
100145
yield repo_id
101146
try:
102-
delete_repo(hf_api, repo_name, token=hf_token, organization=USER, repo_type="dataset")
147+
delete_repo(hf_api, repo_name, token=hf_token, organization=CI_HUB_USER, repo_type="dataset")
103148
except (requests.exceptions.HTTPError, ValueError): # catch http error and token invalid error
104149
pass
105150

106151

107152
@pytest.fixture()
108153
def hf_private_dataset_repo_zipped_img_data(hf_private_dataset_repo_zipped_img_data_):
109-
with patch("datasets.config.HF_ENDPOINT", ENDPOINT_STAGING):
110-
with patch("datasets.config.HUB_DATASETS_URL", ENDPOINT_STAGING_DATASETS_URL):
154+
with patch("datasets.config.HF_ENDPOINT", CI_HUB_ENDPOINT):
155+
with patch("datasets.config.HUB_DATASETS_URL", CI_HUB_DATASETS_URL):
111156
yield hf_private_dataset_repo_zipped_img_data_

0 commit comments

Comments
 (0)