|
37 | 37 | import yaml |
38 | 38 | from fsspec.core import url_to_fs |
39 | 39 | from huggingface_hub import DatasetCard, DatasetCardData, HfApi, HfFileSystem |
| 40 | +from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError, RevisionNotFoundError |
40 | 41 |
|
41 | 42 | from . import config |
42 | 43 | from .arrow_dataset import Dataset |
@@ -1836,28 +1837,26 @@ def dataset_module_factory( |
1836 | 1837 | token=download_config.token, |
1837 | 1838 | timeout=100.0, |
1838 | 1839 | ) |
1839 | | - except Exception as e: # noqa catch any exception of hf_hub and consider that the dataset doesn't exist |
1840 | | - if isinstance( |
1841 | | - e, |
1842 | | - ( |
1843 | | - OfflineModeIsEnabled, |
1844 | | - requests.exceptions.ConnectTimeout, |
1845 | | - requests.exceptions.ConnectionError, |
1846 | | - ), |
1847 | | - ): |
1848 | | - raise ConnectionError(f"Couldn't reach '{path}' on the Hub ({type(e).__name__})") |
1849 | | - elif "404" in str(e): |
1850 | | - msg = f"Dataset '{path}' doesn't exist on the Hub or cannot be accessed" |
1851 | | - raise DatasetNotFoundError(msg + f" at revision '{revision}'" if revision else msg) |
1852 | | - elif "401" in str(e): |
1853 | | - msg = f"Dataset '{path}' doesn't exist on the Hub or cannot be accessed" |
1854 | | - msg = msg + f" at revision '{revision}'" if revision else msg |
1855 | | - raise DatasetNotFoundError( |
1856 | | - msg |
1857 | | - + f". If the dataset is private or gated, make sure to log in with `huggingface-cli login` or visit the dataset page at https://huggingface.co/datasets/{path} to ask for access." |
1858 | | - ) |
1859 | | - else: |
1860 | | - raise e |
| 1840 | + except ( |
| 1841 | + OfflineModeIsEnabled, |
| 1842 | + requests.exceptions.ConnectTimeout, |
| 1843 | + requests.exceptions.ConnectionError, |
| 1844 | + ) as e: |
| 1845 | + raise ConnectionError(f"Couldn't reach '{path}' on the Hub ({e.__class__.__name__})") from e |
| 1846 | + except GatedRepoError as e: |
| 1847 | + message = f"Dataset '{path}' is a gated dataset on the Hub." |
| 1848 | + if "401 Client Error" in str(e): |
| 1849 | + message += " You must be authenticated to access it." |
| 1850 | + elif "403 Client Error" in str(e): |
| 1851 | + message += f" Visit the dataset page at https://huggingface.co/datasets/{path} to ask for access." |
| 1852 | + raise DatasetNotFoundError(message) from e |
| 1853 | + except RevisionNotFoundError as e: |
| 1854 | + raise DatasetNotFoundError( |
| 1855 | + f"Revision '{revision}' doesn't exist for dataset '{path}' on the Hub." |
| 1856 | + ) from e |
| 1857 | + except RepositoryNotFoundError as e: |
| 1858 | + raise DatasetNotFoundError(f"Dataset '{path}' doesn't exist on the Hub or cannot be accessed.") from e |
| 1859 | + |
1861 | 1860 | if filename in [sibling.rfilename for sibling in dataset_info.siblings]: # contains a dataset script |
1862 | 1861 | fs = HfFileSystem(endpoint=config.HF_ENDPOINT, token=download_config.token) |
1863 | 1862 | if _require_custom_configs or (revision and revision != "main"): |
|
0 commit comments