Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 6 additions & 13 deletions .github/ISSUE_TEMPLATE/bug-report.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,9 @@ A clear and concise description of the expected results.
## Actual results
Specify the actual results or traceback.

## Versions
Paste the output of the following code:
```python
import datasets
import sys
import platform

print(f"""
- Datasets: {datasets.__version__}
- Python: {sys.version}
- Platform: {platform.platform()}
""")
```
## Environment info
<!-- You can run the command `datasets-cli env` and copy-and-paste its output below. -->
- `datasets` version:
- Platform:
- Python version:
- PyArrow version:
2 changes: 1 addition & 1 deletion src/datasets/commands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from argparse import ArgumentParser


class BaseTransformersCLICommand(ABC):
class BaseDatasetsCLICommand(ABC):
@staticmethod
@abstractmethod
def register_subcommand(parser: ArgumentParser):
Expand Down
18 changes: 10 additions & 8 deletions src/datasets/commands/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import shutil
from argparse import ArgumentParser, Namespace

from datasets.commands import BaseTransformersCLICommand
from datasets.commands import BaseDatasetsCLICommand
from datasets.utils.logging import get_logger


Expand Down Expand Up @@ -42,22 +42,24 @@
def convert_command_factory(args: Namespace):
"""
Factory function used to convert a model TF 1.0 checkpoint in a PyTorch checkpoint.
:return: ServeCommand

Returns: ConvertCommand
"""
return ConvertCommand(args.tfds_path, args.datasets_directory)


class ConvertCommand(BaseTransformersCLICommand):
class ConvertCommand(BaseDatasetsCLICommand):
@staticmethod
def register_subcommand(parser: ArgumentParser):
"""
Register this command to argparse so it's available for the transformer-cli
:param parser: Root parser to register command-specific arguments
:return:
Register this command to argparse so it's available for the datasets-cli

Args:
parser: Root parser to register command-specific arguments
"""
train_parser = parser.add_parser(
"convert",
help="CLI tool to convert a (nlp) TensorFlow-Dataset in a HuggingFace-NLP dataset.",
help="Convert a TensorFlow Datasets dataset to a HuggingFace Datasets dataset.",
)
train_parser.add_argument(
"--tfds_path",
Expand All @@ -66,7 +68,7 @@ def register_subcommand(parser: ArgumentParser):
help="Path to a TensorFlow Datasets folder to convert or a single tfds file to convert.",
)
train_parser.add_argument(
"--datasets_directory", type=str, required=True, help="Path to the HuggingFace NLP folder."
"--datasets_directory", type=str, required=True, help="Path to the HuggingFace Datasets folder."
)
train_parser.set_defaults(func=convert_command_factory)

Expand Down
2 changes: 0 additions & 2 deletions src/datasets/commands/datasets_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from argparse import ArgumentParser

from datasets.commands.convert import ConvertCommand
from datasets.commands.download import DownloadCommand
from datasets.commands.dummy_data import DummyDataCommand
from datasets.commands.env import EnvironmentCommand
from datasets.commands.run_beam import RunBeamCommand
Expand All @@ -17,7 +16,6 @@ def main():

# Register commands
ConvertCommand.register_subcommand(commands_parser)
DownloadCommand.register_subcommand(commands_parser)
EnvironmentCommand.register_subcommand(commands_parser)
TestCommand.register_subcommand(commands_parser)
RunBeamCommand.register_subcommand(commands_parser)
Expand Down
32 changes: 0 additions & 32 deletions src/datasets/commands/download.py

This file was deleted.

6 changes: 3 additions & 3 deletions src/datasets/commands/dummy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from typing import Optional

from datasets import config
from datasets.commands import BaseTransformersCLICommand
from datasets.commands import BaseDatasetsCLICommand
from datasets.load import import_main_class, prepare_module
from datasets.utils import MockDownloadManager
from datasets.utils.download_manager import DownloadManager
Expand Down Expand Up @@ -212,10 +212,10 @@ def compress_autogenerated_dummy_data(self, path_to_dataset):
shutil.rmtree(base_name)


class DummyDataCommand(BaseTransformersCLICommand):
class DummyDataCommand(BaseDatasetsCLICommand):
@staticmethod
def register_subcommand(parser: ArgumentParser):
test_parser = parser.add_parser("dummy_data")
test_parser = parser.add_parser("dummy_data", help="Generate dummy data.")
test_parser.add_argument("--auto_generate", action="store_true", help="Automatically generate dummy data")
test_parser.add_argument(
"--n_lines", type=int, default=5, help="Number of lines or samples to keep when auto-generating dummy data"
Expand Down
37 changes: 7 additions & 30 deletions src/datasets/commands/env.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,31 @@
import platform
from argparse import ArgumentParser

import pyarrow

from datasets import __version__ as version
from datasets import config
from datasets.commands import BaseTransformersCLICommand
from datasets.commands import BaseDatasetsCLICommand


def info_command_factory(_):
return EnvironmentCommand()


class EnvironmentCommand(BaseTransformersCLICommand):
class EnvironmentCommand(BaseDatasetsCLICommand):
@staticmethod
def register_subcommand(parser: ArgumentParser):
download_parser = parser.add_parser("env")
download_parser = parser.add_parser("env", help="Print relevant system environment info.")
download_parser.set_defaults(func=info_command_factory)

def run(self):
pt_version = "not installed"
pt_cuda_available = "NA"
if config.TORCH_AVAILABLE:
import torch

pt_version = torch.__version__
pt_cuda_available = torch.cuda.is_available()

tf_version = "not installed"
tf_cuda_available = "NA"
if config.TF_AVAILABLE:
import tensorflow as tf

tf_version = tf.__version__
try:
# deprecated in v2.1
tf_cuda_available = tf.test.is_gpu_available()
except AttributeError:
# returns list of devices, convert to bool
tf_cuda_available = bool(tf.config.list_physical_devices("GPU"))

info = {
"`datasets` version": version,
"Platform": platform.platform(),
"Python version": platform.python_version(),
"PyTorch version (GPU?)": "{} ({})".format(pt_version, pt_cuda_available),
"Tensorflow version (GPU?)": "{} ({})".format(tf_version, tf_cuda_available),
"Using GPU in script?": "<fill in>",
"Using distributed or parallel set-up in script?": "<fill in>",
"PyArrow version": pyarrow.__version__,
}

print("\nCopy-and-paste the text below in your GitHub issue and FILL OUT the two last points.\n")
print("\nCopy-and-paste the text below in your GitHub issue.\n")
print(self.format_dict(info))

return info
Expand Down
6 changes: 3 additions & 3 deletions src/datasets/commands/run_beam.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from datasets import config
from datasets.builder import DatasetBuilder
from datasets.commands import BaseTransformersCLICommand
from datasets.commands import BaseDatasetsCLICommand
from datasets.load import import_main_class, prepare_module
from datasets.utils.download_manager import DownloadConfig, GenerateMode

Expand All @@ -25,10 +25,10 @@ def run_beam_command_factory(args):
)


class RunBeamCommand(BaseTransformersCLICommand):
class RunBeamCommand(BaseDatasetsCLICommand):
@staticmethod
def register_subcommand(parser: ArgumentParser):
run_beam_parser = parser.add_parser("run_beam")
run_beam_parser = parser.add_parser("run_beam", help="Run a Beam dataset processing pipeline.")
run_beam_parser.add_argument("--name", type=str, default=None, help="Dataset processing name")
run_beam_parser.add_argument(
"--cache_dir",
Expand Down
6 changes: 3 additions & 3 deletions src/datasets/commands/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import datasets.config
from datasets.builder import DatasetBuilder
from datasets.commands import BaseTransformersCLICommand
from datasets.commands import BaseDatasetsCLICommand
from datasets.load import import_main_class, prepare_module
from datasets.utils.download_manager import GenerateMode
from datasets.utils.filelock import logger as fl_logger
Expand All @@ -32,10 +32,10 @@ def test_command_factory(args):
)


class TestCommand(BaseTransformersCLICommand):
class TestCommand(BaseDatasetsCLICommand):
@staticmethod
def register_subcommand(parser: ArgumentParser):
test_parser = parser.add_parser("test")
test_parser = parser.add_parser("test", help="Test dataset implementation.")
test_parser.add_argument("--name", type=str, default=None, help="Dataset processing name")
test_parser.add_argument(
"--cache_dir",
Expand Down