We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 5a6021a commit a868d35Copy full SHA for a868d35
3 files changed
docs/source/package_reference/main_classes.rst
@@ -32,7 +32,7 @@ The base class :class:`datasets.Dataset` implements a Dataset backed by an Apach
32
info, split, builder_name, citation, config_name, dataset_size,
33
description, download_checksums, download_size, features, homepage,
34
license, size_in_bytes, supervised_keys, version,
35
- from_csv, from_json, from_text,
+ from_csv, from_json, from_text, prepare_for_task,
36
37
.. autofunction:: datasets.concatenate_datasets
38
@@ -54,7 +54,7 @@ It also has dataset transform methods like map or filter, to process all the spl
54
flatten_, cast_, remove_columns_, rename_column_,
55
flatten, cast, remove_columns, rename_column, class_encode_column,
56
save_to_disk, load_from_disk,
57
+ from_csv, from_json, from_text, prepare_for_task
58
59
60
``Features``
src/datasets/arrow_dataset.py
@@ -1391,12 +1391,12 @@ def prepare_for_task(self, task: Union[str, TaskTemplate]) -> "Dataset":
1391
Casts :attr:`datasets.DatasetInfo.features` according to a task-specific schema.
1392
1393
Args:
1394
- task (:obj:`Union[str, TaskTemplate]`): The task to prepare the dataset for during training and evaluation. If `str`, supported tasks include:
+ task (:obj:`Union[str, TaskTemplate]`): The task to prepare the dataset for during training and evaluation. If :obj:`str`, supported tasks include:
1395
1396
- - :obj:`"text-clasification"`
+ - :obj:`"text-classification"`
1397
- :obj:`"question-answering"`
1398
1399
- If `TaskTemplate`, must be one of the task templates in `datasets.tasks`.
+ If :obj:`TaskTemplate`, must be one of the task templates in :obj:`datasets.tasks`.
1400
"""
1401
# TODO(lewtun): Add support for casting nested features like answers.text and answers.answer_start in SQuAD
1402
if isinstance(task, str):
src/datasets/dataset_dict.py
@@ -13,6 +13,7 @@
13
from .features import Features
14
from .filesystems import extract_path_from_uri, is_remote_filesystem
15
from .table import Table
16
+from .tasks import TaskTemplate
17
from .utils.deprecation_utils import deprecated
18
from .utils.typing import PathLike
19
@@ -791,16 +792,18 @@ def from_text(
791
792
path_or_paths, features=features, cache_dir=cache_dir, keep_in_memory=keep_in_memory, **kwargs
793
).read()
794
- def prepare_for_task(self, task: str):
795
+ def prepare_for_task(self, task: Union[str, TaskTemplate]):
796
"""Prepare a dataset for the given task.
797
798
799
800
- task (``str``): The task to prepare the dataset for during training and evaluation. Supported tasks include:
801
802
803
804
805
+
806
807
808
self._check_values_type()
809
return DatasetDict({k: dataset.prepare_for_task(task=task) for k, dataset in self.items()})
0 commit comments