Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions src/datasets/dataset_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import fsspec
import numpy as np

from datasets.splits import NamedSplit, Split
from datasets.utils.doc_utils import is_documented_by

from .arrow_dataset import Dataset
Expand All @@ -30,6 +31,20 @@ def _check_values_type(self):
"Values in `DatasetDict` should of type `Dataset` but got type '{}'".format(type(dataset))
)

def __getitem__(self, k) -> Dataset:
if isinstance(k, (str, NamedSplit)) or len(self) == 0:
return super().__getitem__(k)
else:
available_suggested_splits = [
str(split) for split in (Split.TRAIN, Split.TEST, Split.VALIDATION) if split in self
]
suggested_split = available_suggested_splits[0] if available_suggested_splits else list(self)[0]
raise KeyError(
f"Invalid key: {k}. Please first select a split. For example: "
f"`my_dataset_dictionary['{suggested_split}'][{k}]`. "
f"Available splits: {sorted(self)}"
)

@property
def data(self) -> Dict[str, Table]:
"""The Apache Arrow tables backing each split."""
Expand Down