huggingface · mariosasko · Jul 3, 2023 · Jun 28, 2023 · Jun 30, 2023
diff --git a/docs/source/upload_dataset.mdx b/docs/source/upload_dataset.mdx
@@ -113,16 +113,16 @@ To set your dataset as private, set the `private` parameter to `True`. This para
 
 A private dataset is only accessible by you. Similarly, if you share a dataset within your organization, then members of the organization can also access the dataset.
 
-Load a private dataset by providing your authentication token to the `use_auth_token` parameter:
+Load a private dataset by providing your authentication token to the `token` parameter:
 
 ```py
 >>> from datasets import load_dataset
 
 # Load a private individual dataset
->>> dataset = load_dataset("stevhliu/demo", use_auth_token=True)
+>>> dataset = load_dataset("stevhliu/demo", token=True)
 
 # Load a private organization dataset
->>> dataset = load_dataset("organization/dataset_name", use_auth_token=True)
+>>> dataset = load_dataset("organization/dataset_name", token=True)
 ```
 
 ## What's next?

diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py
@@ -5316,9 +5316,7 @@ def path_in_repo(_index, shard):
             for data_file in data_files
             if data_file.startswith(f"data/{split}-") and data_file not in shards_path_in_repo
         ]
-        deleted_size = sum(
-            xgetsize(hf_hub_url(repo_id, data_file), use_auth_token=token) for data_file in data_files_to_delete
-        )
+        deleted_size = sum(xgetsize(hf_hub_url(repo_id, data_file), token=token) for data_file in data_files_to_delete)
 
         def delete_file(file):
             api.delete_file(file, repo_id=repo_id, token=token, repo_type="dataset", revision=branch)
@@ -5420,7 +5418,7 @@ def push_to_hub(
         if "README.md" in repo_files:
             download_config = DownloadConfig()
             download_config.download_desc = "Downloading metadata"
-            download_config.use_auth_token = token
+            download_config.token = token
             dataset_readme_path = cached_path(
                 hf_hub_url(repo_id, "README.md"),
                 download_config=download_config,
@@ -5438,7 +5436,7 @@ def push_to_hub(
             dataset_card_data = DatasetCardData()
             download_config = DownloadConfig()
             download_config.download_desc = "Downloading metadata"
-            download_config.use_auth_token = token
+            download_config.token = token
             dataset_infos_path = cached_path(
                 hf_hub_url(repo_id, config.DATASETDICT_INFOS_FILENAME),
                 download_config=download_config,

diff --git a/src/datasets/builder.py b/src/datasets/builder.py
@@ -253,7 +253,7 @@ class DatasetBuilder:
         features ([`Features`], *optional*):
             Features types to use with this dataset.
             It can be used to change the [`Features`] types of a dataset, for example.
-        use_auth_token (`str` or `bool`, *optional*):
+        token (`str` or `bool`, *optional*):
             String or boolean to use as Bearer token for remote files on the
             Datasets Hub. If `True`, will get token from `"~/.huggingface"`.
         repo_id (`str`, *optional*):
@@ -316,7 +316,8 @@ def __init__(
         base_path: Optional[str] = None,
         info: Optional[DatasetInfo] = None,
         features: Optional[Features] = None,
-        use_auth_token: Optional[Union[bool, str]] = None,
+        token: Optional[Union[bool, str]] = None,
+        use_auth_token="deprecated",
         repo_id: Optional[str] = None,
         data_files: Optional[Union[str, list, dict, DataFilesDict]] = None,
         data_dir: Optional[str] = None,
@@ -325,6 +326,13 @@ def __init__(
         name="deprecated",
         **config_kwargs,
     ):
+        if use_auth_token != "deprecated":
+            warnings.warn(
+                "'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.\n"
+                f"You can remove this warning by passing 'token={use_auth_token}' instead.",
+                FutureWarning,
+            )
+            token = use_auth_token
         if name != "deprecated":
             warnings.warn(
                 "Parameter 'name' was renamed to 'config_name' in version 2.3.0 and will be removed in 3.0.0.",
@@ -335,14 +343,16 @@ def __init__(
         self.name: str = camelcase_to_snakecase(self.__module__.split(".")[-1])
         self.hash: Optional[str] = hash
         self.base_path = base_path
-        self.use_auth_token = use_auth_token
+        self.token = token
+        # For backwards compatibility (e.g. if accessed in a dataset script)
+        self.use_auth_token = token
         self.repo_id = repo_id
         self.storage_options = storage_options
         self._writer_batch_size = writer_batch_size or self.DEFAULT_WRITER_BATCH_SIZE
 
         if data_files is not None and not isinstance(data_files, DataFilesDict):
             data_files = DataFilesDict.from_local_or_remote(
-                sanitize_patterns(data_files), base_path=base_path, use_auth_token=use_auth_token
+                sanitize_patterns(data_files), base_path=base_path, token=token
             )
 
         # Prepare config: DatasetConfig contains name, version and description but can be extended by each dataset
@@ -699,7 +709,7 @@ def download_and_prepare(
 
                 <Deprecated version="2.7.1">
 
-                Pass `use_auth_token` to the initializer/`load_dataset_builder` instead.
+                Pass `use_auth_token` to `load_dataset_builder` instead.
 
                 </Deprecated>
             file_format (`str`, *optional*):
@@ -761,11 +771,12 @@ def download_and_prepare(
             )
         if use_auth_token != "deprecated":
             warnings.warn(
-                "'use_auth_token' was deprecated in version 2.7.1 and will be removed in 3.0.0. Pass `use_auth_token` to the initializer/`load_dataset_builder` instead.",
+                "'use_auth_token' was deprecated in version 2.7.1 and will be removed in 3.0.0. Pass `token` to `load_dataset_builder` instead.",
                 FutureWarning,
             )
+            token = use_auth_token
         else:
-            use_auth_token = self.use_auth_token
+            token = self.token
 
         output_dir = output_dir if output_dir is not None else self._cache_dir
         # output_dir can be a remote bucket on GCS or S3 (when using BeamBasedBuilder for distributed data processing)
@@ -799,7 +810,7 @@ def download_and_prepare(
                     force_extract=download_mode == DownloadMode.FORCE_REDOWNLOAD,
                     use_etag=False,
                     num_proc=num_proc,
-                    use_auth_token=use_auth_token,
+                    token=token,
                     storage_options=self.storage_options,
                 )  # We don't use etag for data files to speed up the process
 
@@ -1273,7 +1284,7 @@ def as_streaming_dataset(
 
         dl_manager = StreamingDownloadManager(
             base_path=base_path or self.base_path,
-            download_config=DownloadConfig(use_auth_token=self.use_auth_token, storage_options=self.storage_options),
+            download_config=DownloadConfig(token=self.token, storage_options=self.storage_options),
             dataset_name=self.name,
             data_dir=self.config.data_dir,
         )
@@ -1303,7 +1314,7 @@ def _as_streaming_dataset_single(
     ) -> IterableDataset:
         ex_iterable = self._get_examples_iterable_for_split(splits_generator)
         # add auth to be able to access and decode audio/image files from private repositories.
-        token_per_repo_id = {self.repo_id: self.use_auth_token} if self.repo_id else {}
+        token_per_repo_id = {self.repo_id: self.token} if self.repo_id else {}
         return IterableDataset(
             ex_iterable, info=self.info, split=splits_generator.name, token_per_repo_id=token_per_repo_id
         )

diff --git a/src/datasets/data_files.py b/src/datasets/data_files.py
@@ -679,21 +679,21 @@ def get_metadata_patterns_in_dataset_repository(
 
 
 def _get_single_origin_metadata_locally_or_by_urls(
-    data_file: Union[Path, Url], use_auth_token: Optional[Union[bool, str]] = None
+    data_file: Union[Path, Url], token: Optional[Union[bool, str]] = None
 ) -> Tuple[str]:
     if isinstance(data_file, Url):
         data_file = str(data_file)
-        return (request_etag(data_file, use_auth_token=use_auth_token),)
+        return (request_etag(data_file, token=token),)
     else:
         data_file = str(data_file.resolve())
         return (str(os.path.getmtime(data_file)),)
 
 
 def _get_origin_metadata_locally_or_by_urls(
-    data_files: List[Union[Path, Url]], max_workers=64, use_auth_token: Optional[Union[bool, str]] = None
+    data_files: List[Union[Path, Url]], max_workers=64, token: Optional[Union[bool, str]] = None
 ) -> Tuple[str]:
     return thread_map(
-        partial(_get_single_origin_metadata_locally_or_by_urls, use_auth_token=use_auth_token),
+        partial(_get_single_origin_metadata_locally_or_by_urls, token=token),
         data_files,
         max_workers=max_workers,
         tqdm_class=logging.tqdm,
@@ -742,11 +742,11 @@ def from_local_or_remote(
         patterns: List[str],
         base_path: Optional[str] = None,
         allowed_extensions: Optional[List[str]] = None,
-        use_auth_token: Optional[Union[bool, str]] = None,
+        token: Optional[Union[bool, str]] = None,
     ) -> "DataFilesList":
         base_path = base_path if base_path is not None else str(Path().resolve())
         data_files = resolve_patterns_locally_or_by_urls(base_path, patterns, allowed_extensions)
-        origin_metadata = _get_origin_metadata_locally_or_by_urls(data_files, use_auth_token=use_auth_token)
+        origin_metadata = _get_origin_metadata_locally_or_by_urls(data_files, token=token)
         return cls(data_files, origin_metadata)
 
     def filter_extensions(self, extensions: List[str]) -> "DataFilesList":
@@ -784,7 +784,7 @@ def from_local_or_remote(
         patterns: Dict[str, Union[List[str], DataFilesList]],
         base_path: Optional[str] = None,
         allowed_extensions: Optional[List[str]] = None,
-        use_auth_token: Optional[Union[bool, str]] = None,
+        token: Optional[Union[bool, str]] = None,
     ) -> "DataFilesDict":
         out = cls()
         for key, patterns_for_key in patterns.items():
@@ -793,7 +793,7 @@ def from_local_or_remote(
                     patterns_for_key,
                     base_path=base_path,
                     allowed_extensions=allowed_extensions,
-                    use_auth_token=use_auth_token,
+                    token=token,
                 )
                 if not isinstance(patterns_for_key, DataFilesList)
                 else patterns_for_key

diff --git a/src/datasets/dataset_dict.py b/src/datasets/dataset_dict.py
@@ -1669,7 +1669,7 @@ def push_to_hub(
         if "README.md" in repo_files:
             download_config = DownloadConfig()
             download_config.download_desc = "Downloading metadata"
-            download_config.use_auth_token = token
+            download_config.token = token
             dataset_readme_path = cached_path(
                 hf_hub_url(repo_id, "README.md"),
                 download_config=download_config,

diff --git a/src/datasets/download/download_config.py b/src/datasets/download/download_config.py
@@ -1,4 +1,5 @@
 import copy
+import warnings
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Dict, Optional, Union
@@ -36,9 +37,19 @@ class DownloadConfig:
             The number of processes to launch to download the files in parallel.
         max_retries (`int`, default to `1`):
             The number of times to retry an HTTP request if it fails.
+        token (`str` or `bool`, *optional*):
+            Optional string or boolean to use as Bearer token
+            for remote files on the Datasets Hub. If `True`, or not specified, will get token from `~/.huggingface`.
         use_auth_token (`str` or `bool`, *optional*):
             Optional string or boolean to use as Bearer token
             for remote files on the Datasets Hub. If `True`, or not specified, will get token from `~/.huggingface`.
+
+            <Deprecated version="2.14.0">
+
+            `use_auth_token` was deprecated in favor of `token` in version 2.14.0 and will be removed in 3.0.0.
+
+            </Deprecated>
+
         ignore_url_params (`bool`, defaults to `False`):
             Whether to strip all query parameters and fragments from
             the download URL before using it for caching the file.
@@ -60,10 +71,20 @@ class DownloadConfig:
     use_etag: bool = True
     num_proc: Optional[int] = None
     max_retries: int = 1
-    use_auth_token: Optional[Union[str, bool]] = None
+    token: Optional[Union[str, bool]] = None
+    use_auth_token = "deprecated"
     ignore_url_params: bool = False
     storage_options: Optional[Dict] = None
     download_desc: Optional[str] = None
 
+    def __post_init__(self):
+        if self.use_auth_token != "deprecated":
+            warnings.warn(
+                "'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.\n"
+                f"You can remove this warning by passing 'token={self.use_auth_token}' instead.",
+                FutureWarning,
+            )
+            self.token = self.use_auth_token
+
     def copy(self) -> "DownloadConfig":
         return self.__class__(**{k: copy.deepcopy(v) for k, v in self.__dict__.items()})