Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/distilabel/steps/generators/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class LoadDataFromHub(GeneratorStep):
- `split`: The split of the dataset to load. Defaults to 'train'.
- `config`: The configuration of the dataset to load. This is optional and only
needed if the dataset has multiple configurations.
- `revision`: The revision of the dataset to load. Defaults to the latest revision.
- `streaming`: Whether to load the dataset in streaming mode or not. Defaults to
`False`.
- `num_examples`: The number of examples to load from the dataset.
Expand Down Expand Up @@ -122,6 +123,10 @@ class LoadDataFromHub(GeneratorStep):
description="The configuration of the dataset to load. This is optional and only"
" needed if the dataset has multiple configurations.",
)
revision: Optional[RuntimeParameter[str]] = Field(
default=None,
description="The revision of the dataset to load. Defaults to the latest revision.",
)
streaming: RuntimeParameter[bool] = Field(
default=False,
description="Whether to load the dataset in streaming mode or not. Defaults to False.",
Expand Down Expand Up @@ -149,6 +154,7 @@ def load(self) -> None:
self.repo_id, # type: ignore
self.config,
split=self.split,
revision=self.revision,
streaming=self.streaming,
)
num_examples = self._get_dataset_num_examples()
Expand Down