Skip to content

Commit 5e7739d

Browse files
mariosaskolhoestq
andauthored
Datasets cli improvements (#2315)
* Use env command in bug report * Remove download command * Add command descriptions * Update bug report * Remove less important env fields * Style * Remove less important fields from bug report * Replace sys.version with plaftorm.platform, add pyarrow version * Update bug-report.md Co-authored-by: Quentin Lhoest <[email protected]>
1 parent b589aee commit 5e7739d

9 files changed

Lines changed: 33 additions & 95 deletions

File tree

.github/ISSUE_TEMPLATE/bug-report.md

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,9 @@ A clear and concise description of the expected results.
2121
## Actual results
2222
Specify the actual results or traceback.
2323

24-
## Versions
25-
Paste the output of the following code:
26-
```python
27-
import datasets
28-
import sys
29-
import platform
30-
31-
print(f"""
32-
- Datasets: {datasets.__version__}
33-
- Python: {sys.version}
34-
- Platform: {platform.platform()}
35-
""")
36-
```
24+
## Environment info
25+
<!-- You can run the command `datasets-cli env` and copy-and-paste its output below. -->
26+
- `datasets` version:
27+
- Platform:
28+
- Python version:
29+
- PyArrow version:

src/datasets/commands/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from argparse import ArgumentParser
33

44

5-
class BaseTransformersCLICommand(ABC):
5+
class BaseDatasetsCLICommand(ABC):
66
@staticmethod
77
@abstractmethod
88
def register_subcommand(parser: ArgumentParser):

src/datasets/commands/convert.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import shutil
44
from argparse import ArgumentParser, Namespace
55

6-
from datasets.commands import BaseTransformersCLICommand
6+
from datasets.commands import BaseDatasetsCLICommand
77
from datasets.utils.logging import get_logger
88

99

@@ -42,22 +42,24 @@
4242
def convert_command_factory(args: Namespace):
4343
"""
4444
Factory function used to convert a model TF 1.0 checkpoint in a PyTorch checkpoint.
45-
:return: ServeCommand
45+
46+
Returns: ConvertCommand
4647
"""
4748
return ConvertCommand(args.tfds_path, args.datasets_directory)
4849

4950

50-
class ConvertCommand(BaseTransformersCLICommand):
51+
class ConvertCommand(BaseDatasetsCLICommand):
5152
@staticmethod
5253
def register_subcommand(parser: ArgumentParser):
5354
"""
54-
Register this command to argparse so it's available for the transformer-cli
55-
:param parser: Root parser to register command-specific arguments
56-
:return:
55+
Register this command to argparse so it's available for the datasets-cli
56+
57+
Args:
58+
parser: Root parser to register command-specific arguments
5759
"""
5860
train_parser = parser.add_parser(
5961
"convert",
60-
help="CLI tool to convert a (nlp) TensorFlow-Dataset in a HuggingFace-NLP dataset.",
62+
help="Convert a TensorFlow Datasets dataset to a HuggingFace Datasets dataset.",
6163
)
6264
train_parser.add_argument(
6365
"--tfds_path",
@@ -66,7 +68,7 @@ def register_subcommand(parser: ArgumentParser):
6668
help="Path to a TensorFlow Datasets folder to convert or a single tfds file to convert.",
6769
)
6870
train_parser.add_argument(
69-
"--datasets_directory", type=str, required=True, help="Path to the HuggingFace NLP folder."
71+
"--datasets_directory", type=str, required=True, help="Path to the HuggingFace Datasets folder."
7072
)
7173
train_parser.set_defaults(func=convert_command_factory)
7274

src/datasets/commands/datasets_cli.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from argparse import ArgumentParser
33

44
from datasets.commands.convert import ConvertCommand
5-
from datasets.commands.download import DownloadCommand
65
from datasets.commands.dummy_data import DummyDataCommand
76
from datasets.commands.env import EnvironmentCommand
87
from datasets.commands.run_beam import RunBeamCommand
@@ -17,7 +16,6 @@ def main():
1716

1817
# Register commands
1918
ConvertCommand.register_subcommand(commands_parser)
20-
DownloadCommand.register_subcommand(commands_parser)
2119
EnvironmentCommand.register_subcommand(commands_parser)
2220
TestCommand.register_subcommand(commands_parser)
2321
RunBeamCommand.register_subcommand(commands_parser)

src/datasets/commands/download.py

Lines changed: 0 additions & 32 deletions
This file was deleted.

src/datasets/commands/dummy_data.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from typing import Optional
1010

1111
from datasets import config
12-
from datasets.commands import BaseTransformersCLICommand
12+
from datasets.commands import BaseDatasetsCLICommand
1313
from datasets.load import import_main_class, prepare_module
1414
from datasets.utils import MockDownloadManager
1515
from datasets.utils.download_manager import DownloadManager
@@ -212,10 +212,10 @@ def compress_autogenerated_dummy_data(self, path_to_dataset):
212212
shutil.rmtree(base_name)
213213

214214

215-
class DummyDataCommand(BaseTransformersCLICommand):
215+
class DummyDataCommand(BaseDatasetsCLICommand):
216216
@staticmethod
217217
def register_subcommand(parser: ArgumentParser):
218-
test_parser = parser.add_parser("dummy_data")
218+
test_parser = parser.add_parser("dummy_data", help="Generate dummy data.")
219219
test_parser.add_argument("--auto_generate", action="store_true", help="Automatically generate dummy data")
220220
test_parser.add_argument(
221221
"--n_lines", type=int, default=5, help="Number of lines or samples to keep when auto-generating dummy data"

src/datasets/commands/env.py

Lines changed: 7 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,31 @@
11
import platform
22
from argparse import ArgumentParser
33

4+
import pyarrow
5+
46
from datasets import __version__ as version
5-
from datasets import config
6-
from datasets.commands import BaseTransformersCLICommand
7+
from datasets.commands import BaseDatasetsCLICommand
78

89

910
def info_command_factory(_):
1011
return EnvironmentCommand()
1112

1213

13-
class EnvironmentCommand(BaseTransformersCLICommand):
14+
class EnvironmentCommand(BaseDatasetsCLICommand):
1415
@staticmethod
1516
def register_subcommand(parser: ArgumentParser):
16-
download_parser = parser.add_parser("env")
17+
download_parser = parser.add_parser("env", help="Print relevant system environment info.")
1718
download_parser.set_defaults(func=info_command_factory)
1819

1920
def run(self):
20-
pt_version = "not installed"
21-
pt_cuda_available = "NA"
22-
if config.TORCH_AVAILABLE:
23-
import torch
24-
25-
pt_version = torch.__version__
26-
pt_cuda_available = torch.cuda.is_available()
27-
28-
tf_version = "not installed"
29-
tf_cuda_available = "NA"
30-
if config.TF_AVAILABLE:
31-
import tensorflow as tf
32-
33-
tf_version = tf.__version__
34-
try:
35-
# deprecated in v2.1
36-
tf_cuda_available = tf.test.is_gpu_available()
37-
except AttributeError:
38-
# returns list of devices, convert to bool
39-
tf_cuda_available = bool(tf.config.list_physical_devices("GPU"))
40-
4121
info = {
4222
"`datasets` version": version,
4323
"Platform": platform.platform(),
4424
"Python version": platform.python_version(),
45-
"PyTorch version (GPU?)": "{} ({})".format(pt_version, pt_cuda_available),
46-
"Tensorflow version (GPU?)": "{} ({})".format(tf_version, tf_cuda_available),
47-
"Using GPU in script?": "<fill in>",
48-
"Using distributed or parallel set-up in script?": "<fill in>",
25+
"PyArrow version": pyarrow.__version__,
4926
}
5027

51-
print("\nCopy-and-paste the text below in your GitHub issue and FILL OUT the two last points.\n")
28+
print("\nCopy-and-paste the text below in your GitHub issue.\n")
5229
print(self.format_dict(info))
5330

5431
return info

src/datasets/commands/run_beam.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from datasets import config
88
from datasets.builder import DatasetBuilder
9-
from datasets.commands import BaseTransformersCLICommand
9+
from datasets.commands import BaseDatasetsCLICommand
1010
from datasets.load import import_main_class, prepare_module
1111
from datasets.utils.download_manager import DownloadConfig, GenerateMode
1212

@@ -25,10 +25,10 @@ def run_beam_command_factory(args):
2525
)
2626

2727

28-
class RunBeamCommand(BaseTransformersCLICommand):
28+
class RunBeamCommand(BaseDatasetsCLICommand):
2929
@staticmethod
3030
def register_subcommand(parser: ArgumentParser):
31-
run_beam_parser = parser.add_parser("run_beam")
31+
run_beam_parser = parser.add_parser("run_beam", help="Run a Beam dataset processing pipeline.")
3232
run_beam_parser.add_argument("--name", type=str, default=None, help="Dataset processing name")
3333
run_beam_parser.add_argument(
3434
"--cache_dir",

src/datasets/commands/test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import datasets.config
88
from datasets.builder import DatasetBuilder
9-
from datasets.commands import BaseTransformersCLICommand
9+
from datasets.commands import BaseDatasetsCLICommand
1010
from datasets.load import import_main_class, prepare_module
1111
from datasets.utils.download_manager import GenerateMode
1212
from datasets.utils.filelock import logger as fl_logger
@@ -32,10 +32,10 @@ def test_command_factory(args):
3232
)
3333

3434

35-
class TestCommand(BaseTransformersCLICommand):
35+
class TestCommand(BaseDatasetsCLICommand):
3636
@staticmethod
3737
def register_subcommand(parser: ArgumentParser):
38-
test_parser = parser.add_parser("test")
38+
test_parser = parser.add_parser("test", help="Test dataset implementation.")
3939
test_parser.add_argument("--name", type=str, default=None, help="Dataset processing name")
4040
test_parser.add_argument(
4141
"--cache_dir",

0 commit comments

Comments
 (0)