Skip to content

Commit aa555a2

Browse files
authored
Properly raise FileNotFound even if the dataset is private (#4536)
* don't use token when not specified, and properly raise filenotfound * re-enable the tests on windows * revert unwanted change * no double backticks
1 parent 8910eda commit aa555a2

File tree

4 files changed

+8
-11
lines changed

4 files changed

+8
-11
lines changed

src/datasets/arrow_dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4073,7 +4073,7 @@ def _push_parquet_shards_to_hub(
40734073

40744074
if token is None:
40754075
raise OSError(
4076-
"You need to provide a `token` or be logged in to Hugging Face with " "`huggingface-cli login`."
4076+
"You need to provide a `token` or be logged in to Hugging Face with `huggingface-cli login`."
40774077
)
40784078

40794079
if split is None:

src/datasets/load.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -783,7 +783,7 @@ def get_module(self) -> DatasetModule:
783783
hfh_dataset_info = HfApi(config.HF_ENDPOINT).dataset_info(
784784
self.name,
785785
revision=self.revision,
786-
token=token,
786+
token=token if token else "no-token",
787787
timeout=100.0,
788788
)
789789
patterns = (
@@ -1140,7 +1140,7 @@ def dataset_module_factory(
11401140
dataset_info = hf_api.dataset_info(
11411141
repo_id=path,
11421142
revision=revision,
1143-
token=token,
1143+
token=token if token else "no-token",
11441144
timeout=100.0,
11451145
)
11461146
except Exception as e: # noqa: catch any exception of hf_hub and consider that the dataset doesn't exist
@@ -1159,7 +1159,10 @@ def dataset_module_factory(
11591159
elif "401" in str(e):
11601160
msg = f"Dataset '{path}' doesn't exist on the Hub"
11611161
msg = msg + f" at revision '{revision}'" if revision else msg
1162-
raise FileNotFoundError(msg + ". If the repo is private, make sure you are authenticated.")
1162+
raise FileNotFoundError(
1163+
msg
1164+
+ ". If the repo is private, make sure you are authenticated with `use_auth_token=True` after logging in with `huggingface-cli login`."
1165+
)
11631166
else:
11641167
raise e
11651168
if filename in [sibling.rfilename for sibling in dataset_info.siblings]:

src/datasets/utils/file_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,7 @@ def get_from_cache(
509509
logger.info(f"Couldn't get ETag version for url {url}")
510510
elif response.status_code == 401 and config.HF_ENDPOINT in url and use_auth_token is None:
511511
raise ConnectionError(
512-
f"Unauthorized for URL {url}. Please use the parameter ``use_auth_token=True`` after logging in with ``huggingface-cli login``"
512+
f"Unauthorized for URL {url}. Please use the parameter `use_auth_token=True` after logging in with `huggingface-cli login`"
513513
)
514514
except (OSError, requests.exceptions.Timeout) as e:
515515
# not connected

tests/test_load.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -781,19 +781,13 @@ def assert_auth(url, *args, headers, **kwargs):
781781
mock_head.assert_called()
782782

783783

784-
@pytest.mark.skipif(
785-
os.name == "nt", reason="skip on windows because of SSL issues with moon-staging.huggingface.co:443"
786-
)
787784
def test_load_streaming_private_dataset(hf_token, hf_private_dataset_repo_txt_data):
788785
with pytest.raises(FileNotFoundError):
789786
load_dataset(hf_private_dataset_repo_txt_data, streaming=True)
790787
ds = load_dataset(hf_private_dataset_repo_txt_data, streaming=True, use_auth_token=hf_token)
791788
assert next(iter(ds)) is not None
792789

793790

794-
@pytest.mark.skipif(
795-
os.name == "nt", reason="skip on windows because of SSL issues with moon-staging.huggingface.co:443"
796-
)
797791
def test_load_streaming_private_dataset_with_zipped_data(hf_token, hf_private_dataset_repo_zipped_txt_data):
798792
with pytest.raises(FileNotFoundError):
799793
load_dataset(hf_private_dataset_repo_zipped_txt_data, streaming=True)

0 commit comments

Comments
 (0)