From 993bddd436859e7a51c3919fe414b3c5dd38cd5b Mon Sep 17 00:00:00 2001
From: Lewis Tunstall <lewis.c.tunstall@gmail.com>
Date: Thu, 20 May 2021 15:50:05 +0200
Subject: [PATCH 1/2] Add args description to DatasetInfo

---
 src/datasets/info.py | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/src/datasets/info.py b/src/datasets/info.py
index 8e323bd4c33..a93dbb4bd22 100644
--- a/src/datasets/info.py
+++ b/src/datasets/info.py
@@ -93,23 +93,24 @@ class DatasetInfo:
     Note: Not all fields are known on construction and may be updated later.
 
     Attributes:
-        description (str):
-        citation (str):
-        homepage (str):
-        license (str):
-        features (Features, optional):
+        description (str): A description of the dataset.
+        citation (str): A BibTeX citation of the dataset.
+        homepage (str): A URL to the official homepage for the dataset.
+        license (str): The dataset's license.
+        features (Features, optional): The features used to specify the dataset's columns, types and conversion methods.
         post_processed (PostProcessedInfo, optional):
-        supervised_keys (SupervisedKeysData, optional):
-        builder_name (str, optional)
-        config_name (str, optional)
-        version (str or Version, optional):
-        splits (dict, optional):
-        download_checksums (dict, optional):
-        download_size (int, optional):
+        supervised_keys (SupervisedKeysData, optional): Specifies the input feature and the label for supervised learning if applicable for the dataset.
+        builder_name (str, optional): The name of the :class:`GeneratorBasedBuilder` subclass used to create the dataset. Usually matched to the corresponding script name, but with CamelCase instead of snake_case.
+        config_name (str, optional): The name of the configuration derived from :class:`BuilderConfig`
+        version (str or Version, optional): The version of the dataset.
+        splits (dict, optional): The mapping between split name and metadata.
+        download_checksums (dict, optional): The mapping between the URL to download the dataset's checksums and corresponding metadata.
+        download_size (int, optional): The size of the compressed dataset in bytes.
         post_processing_size (int, optional):
-        dataset_size (int, optional):
-        size_in_bytes (int, optional):
-        task_templates (List[TaskTemplate], optional):
+        dataset_size (int, optional): The combined size of the Apache Arrow tables for all splits in bytes.
+        size_in_bytes (int, optional): The combined size of all files associated with the dataset.
+        task_templates (List[TaskTemplate], optional): The task templates to prepare the dataset for during training and evaluation. Each template casts the dataset's :class:`Features` to standardized column names and types as detailed in :py:mod:`datasets.tasks`.
+        **config_kwargs: Keyword arguments to be passed to the :class:`BuilderConfig` and used in the :class:`DatasetBuilder`.
     """
 
     # Set in the dataset scripts

From c70ab243a1acebe52d33e29a5848671df8dc8106 Mon Sep 17 00:00:00 2001
From: Lewis Tunstall <lewis.c.tunstall@gmail.com>
Date: Fri, 21 May 2021 15:38:23 +0200
Subject: [PATCH 2/2] Add missing attributes descriptions

---
 src/datasets/info.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/datasets/info.py b/src/datasets/info.py
index a93dbb4bd22..6b65f294716 100644
--- a/src/datasets/info.py
+++ b/src/datasets/info.py
@@ -96,19 +96,19 @@ class DatasetInfo:
         description (str): A description of the dataset.
         citation (str): A BibTeX citation of the dataset.
         homepage (str): A URL to the official homepage for the dataset.
-        license (str): The dataset's license.
-        features (Features, optional): The features used to specify the dataset's columns, types and conversion methods.
-        post_processed (PostProcessedInfo, optional):
-        supervised_keys (SupervisedKeysData, optional): Specifies the input feature and the label for supervised learning if applicable for the dataset.
-        builder_name (str, optional): The name of the :class:`GeneratorBasedBuilder` subclass used to create the dataset. Usually matched to the corresponding script name, but with CamelCase instead of snake_case.
+        license (str): The dataset's license. It can be the name of the license or a paragraph containing the terms of the license.
+        features (Features, optional): The features used to specify the dataset's column types.
+        post_processed (PostProcessedInfo, optional): Information regarding the resources of a possible post-processing of a dataset. For example, it can contain the information of an index.
+        supervised_keys (SupervisedKeysData, optional): Specifies the input feature and the label for supervised learning if applicable for the dataset (legacy from TFDS).
+        builder_name (str, optional): The name of the :class:`GeneratorBasedBuilder` subclass used to create the dataset. Usually matched to the corresponding script name. It is also the snake_case version of the dataset builder class name.
         config_name (str, optional): The name of the configuration derived from :class:`BuilderConfig`
         version (str or Version, optional): The version of the dataset.
         splits (dict, optional): The mapping between split name and metadata.
         download_checksums (dict, optional): The mapping between the URL to download the dataset's checksums and corresponding metadata.
-        download_size (int, optional): The size of the compressed dataset in bytes.
-        post_processing_size (int, optional):
-        dataset_size (int, optional): The combined size of the Apache Arrow tables for all splits in bytes.
-        size_in_bytes (int, optional): The combined size of all files associated with the dataset.
+        download_size (int, optional): The size of the files to download to generate the dataset, in bytes.
+        post_processing_size (int, optional): Size of the dataset in bytes after post-processing, if any.
+        dataset_size (int, optional): The combined size in bytes of the Arrow tables for all splits.
+        size_in_bytes (int, optional): The combined size in bytes of all files associated with the dataset (downloaded files + Arrow files).
         task_templates (List[TaskTemplate], optional): The task templates to prepare the dataset for during training and evaluation. Each template casts the dataset's :class:`Features` to standardized column names and types as detailed in :py:mod:`datasets.tasks`.
         **config_kwargs: Keyword arguments to be passed to the :class:`BuilderConfig` and used in the :class:`DatasetBuilder`.
     """