Skip to content

Commit b65660b

Browse files
authored
Deprecate task api (#5865)
* Deprecate Task API * Typo * Update task_templates.mdx * Update task_templates.mdx
1 parent aca4cdc commit b65660b

File tree

4 files changed

+28
-3
lines changed

4 files changed

+28
-3
lines changed

docs/source/package_reference/task_templates.mdx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# Task templates
22

3+
<Tip warning={true}>
4+
5+
The Task API is deprecated in favor of [`train-eval-index`](https://github.com/huggingface/hub-docs/blob/9ab2555e1c146122056aba6f89af404a8bc9a6f1/datasetcard.md?plain=1#L90-L106) and will be removed in the next major release.
6+
7+
</Tip>
8+
39
The tasks supported by [`Dataset.prepare_for_task`] and [`DatasetDict.prepare_for_task`].
410

511
[[autodoc]] datasets.tasks.AutomaticSpeechRecognition

src/datasets/arrow_dataset.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@
109109
)
110110
from .tasks import TaskTemplate
111111
from .utils import logging
112+
from .utils.deprecation_utils import deprecated
112113
from .utils.file_utils import _retry, cached_path, estimate_dataset_size
113114
from .utils.hub import hf_hub_url
114115
from .utils.info_utils import is_small_dataset
@@ -2706,6 +2707,7 @@ def with_transform(
27062707
dataset.set_transform(transform=transform, columns=columns, output_all_columns=output_all_columns)
27072708
return dataset
27082709

2710+
@deprecated()
27092711
def prepare_for_task(self, task: Union[str, TaskTemplate], id: int = 0) -> "Dataset":
27102712
"""
27112713
Prepare a dataset for the given task by casting the dataset's [`Features`] to standardized column names and types as detailed in [`datasets.tasks`](./task_templates).

src/datasets/dataset_dict.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from .table import Table
2626
from .tasks import TaskTemplate
2727
from .utils import logging
28+
from .utils.deprecation_utils import deprecated
2829
from .utils.doc_utils import is_documented_by
2930
from .utils.file_utils import cached_path
3031
from .utils.hub import hf_hub_url
@@ -1537,6 +1538,7 @@ def from_text(
15371538
path_or_paths, features=features, cache_dir=cache_dir, keep_in_memory=keep_in_memory, **kwargs
15381539
).read()
15391540

1541+
@deprecated()
15401542
@is_documented_by(Dataset.prepare_for_task)
15411543
def prepare_for_task(self, task: Union[str, TaskTemplate], id: int = 0) -> "DatasetDict":
15421544
self._check_values_type()

src/datasets/load.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@
6363
_hash_python_lines,
6464
)
6565
from .splits import Split
66-
from .tasks import TaskTemplate
6766
from .utils.deprecation_utils import deprecated
6867
from .utils.file_utils import (
6968
OfflineModeIsEnabled,
@@ -1586,7 +1585,7 @@ def load_dataset(
15861585
revision: Optional[Union[str, Version]] = None,
15871586
token: Optional[Union[bool, str]] = None,
15881587
use_auth_token="deprecated",
1589-
task: Optional[Union[str, TaskTemplate]] = None,
1588+
task="deprecated",
15901589
streaming: bool = False,
15911590
num_proc: Optional[int] = None,
15921591
storage_options: Optional[Dict] = None,
@@ -1708,6 +1707,12 @@ def load_dataset(
17081707
</Deprecated>
17091708
task (`str`):
17101709
The task to prepare the dataset for during training and evaluation. Casts the dataset's [`Features`] to standardized column names and types as detailed in `datasets.tasks`.
1710+
1711+
<Deprecated version="2.13.0">
1712+
1713+
`task` was deprecated in version 2.13.0 and will be removed in 3.0.0.
1714+
1715+
</Deprecated>
17111716
streaming (`bool`, defaults to `False`):
17121717
If set to `True`, don't download the data files. Instead, it streams the data progressively while
17131718
iterating on the dataset. An [`IterableDataset`] or [`IterableDatasetDict`] is returned instead in this case.
@@ -1795,6 +1800,13 @@ def load_dataset(
17951800
f"You can remove this warning by passing 'verification_mode={verification_mode.value}' instead.",
17961801
FutureWarning,
17971802
)
1803+
if task != "deprecated":
1804+
warnings.warn(
1805+
"'task' was deprecated in version 2.13.0 and will be removed in 3.0.0.\n",
1806+
FutureWarning,
1807+
)
1808+
else:
1809+
task = None
17981810
if data_files is not None and not data_files:
17991811
raise ValueError(f"Empty 'data_files': '{data_files}'. It should be either non-empty or None (default).")
18001812
if Path(path, config.DATASET_STATE_JSON_FILENAME).exists():
@@ -1855,7 +1867,10 @@ def load_dataset(
18551867
ds = builder_instance.as_dataset(split=split, verification_mode=verification_mode, in_memory=keep_in_memory)
18561868
# Rename and cast features to match task schema
18571869
if task is not None:
1858-
ds = ds.prepare_for_task(task)
1870+
# To avoid issuing the same warning twice
1871+
with warnings.catch_warnings():
1872+
warnings.simplefilter("ignore", FutureWarning)
1873+
ds = ds.prepare_for_task(task)
18591874
if save_infos:
18601875
builder_instance._save_infos()
18611876

0 commit comments

Comments
 (0)