huggingface · stevhliu · May 25, 2022 · May 6, 2022 · May 6, 2022 · May 9, 2022
diff --git a/src/datasets/features/audio.py b/src/datasets/features/audio.py
@@ -43,6 +43,19 @@ class Audio:
             channels.
         decode (:obj:`bool`, default ``True``): Whether to decode the audio data. If `False`,
             returns the underlying dictionary in the format {"path": audio_path, "bytes": audio_bytes}.
+
+    Example:
+
+    ```py
+    >>> from datasets import load_dataset, Audio
+    >>> ds = load_dataset("PolyAI/minds14", name="en-US", split="train")
+    >>> ds = ds.cast_column("audio", Audio(sampling_rate=16000))
+    >>> ds[0]["audio"]
+    {'array': array([ 2.3443763e-05,  2.1729663e-04,  2.2145823e-04, ...,
+         3.8356509e-05, -7.3497440e-06, -2.1754686e-05], dtype=float32),
+     'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav',
+     'sampling_rate': 16000}
+    ```
     """
 
     sampling_rate: Optional[int] = None

diff --git a/src/datasets/features/features.py b/src/datasets/features/features.py
@@ -429,6 +429,15 @@ class Value:
     large_binary
     string
     large_string
+
+    Example:
+
+    ```py
+    >>> from datasets import Features
+    >>> features = Features({'stars': Value(dtype='int32')})
+    >>> features
+    {'stars': Value(dtype='int32', id=None)}
+    ```
     """
 
     dtype: str
@@ -476,6 +485,20 @@ def encode_example(self, value):
 
 @dataclass
 class Array2D(_ArrayXD):
+    """Create a two-dimensional array.
+
+    Args:
+        shape (`tuple`): The size of each dimension.
+        dtype (`str`): The value of the data type.
+
+    Example:
+
+    ```py
+    >>> from datasets import Features
+    >>> features = Features({'x': Array2D(shape=(1, 3), dtype='int32')})
+    ```
+    """
+
     shape: tuple
     dtype: str
     id: Optional[str] = None
@@ -485,6 +508,20 @@ class Array2D(_ArrayXD):
 
 @dataclass
 class Array3D(_ArrayXD):
+    """Create a three-dimensional array.
+
+    Args:
+        shape (`tuple`): The size of each dimension.
+        dtype (`str`): The value of the data type.
+
+    Example:
+
+    ```py
+    >>> from datasets import Features
+    >>> features = Features({'x': Array3D(shape=(1, 2, 3), dtype='int32')})
+    ```
+    """
+
     shape: tuple
     dtype: str
     id: Optional[str] = None
@@ -494,6 +531,20 @@ class Array3D(_ArrayXD):
 
 @dataclass
 class Array4D(_ArrayXD):
+    """Create a four-dimensional array.
+
+    Args:
+        shape (`tuple`): The size of each dimension.
+        dtype (`str`): The value of the data type.
+
+    Example:
+
+    ```py
+    >>> from datasets import Features
+    >>> features = Features({'x': Array4D(shape=(1, 2, 2, 3), dtype='int32')})
+    ```
+    """
+
     shape: tuple
     dtype: str
     id: Optional[str] = None
@@ -503,6 +554,20 @@ class Array4D(_ArrayXD):
 
 @dataclass
 class Array5D(_ArrayXD):
+    """Create a five-dimensional array.
+
+    Args:
+        shape (`tuple`): The size of each dimension.
+        dtype (`str`): The value of the data type.
+
+    Example:
+
+    ```py
+    >>> from datasets import Features
+    >>> features = Features({'x': Array5D(shape=(1, 2, 2, 3, 3), dtype='int32')})
+    ```
+    """
+
     shape: tuple
     dtype: str
     id: Optional[str] = None
@@ -792,6 +857,15 @@ class ClassLabel:
         names (:obj:`list` of :obj:`str`, optional): String names for the integer classes.
             The order in which the names are provided is kept.
         names_file (:obj:`str`, optional): Path to a file with names for the integer classes, one per line.
+
+    Example:
+
+    ```py
+    >>> from datasets Features
+    >>> features = Features({'label': ClassLabel(num_classes=3, names=['bad', 'ok', 'good'])})
+    >>> features
+    {'label': ClassLabel(num_classes=3, names=['bad', 'ok', 'good'], id=None)}
+    ```
     """
 
     num_classes: int = None
@@ -835,7 +909,17 @@ def __call__(self):
         return self.pa_type
 
     def str2int(self, values: Union[str, Iterable]):
-        """Conversion class name string => integer."""
+        """Conversion class name string => integer.
+
+        Example:
+
+        ```py
+        >>> from datasets import load_dataset
+        >>> ds = load_dataset("rotten_tomatoes", split="train")
+        >>> ds.features["label"].str2int('neg')
+        0
+        ```
+        """
         if not isinstance(values, str) and not isinstance(values, Iterable):
             raise ValueError(
                 f"Values {values} should be a string or an Iterable (list, numpy array, pytorch, tensorflow tensors)"
@@ -864,7 +948,17 @@ def str2int(self, values: Union[str, Iterable]):
         return output if return_list else output[0]
 
     def int2str(self, values: Union[int, Iterable]):
-        """Conversion integer => class name string."""
+        """Conversion integer => class name string.
+
+        Example:
+
+        ```py
+        >>> from datasets import load_dataset
+        >>> ds = load_dataset("rotten_tomatoes", split="train")
+        >>> ds.features["label"].int2str(0)
+        'neg'
+        ```
+        """
         if not isinstance(values, int) and not isinstance(values, Iterable):
             raise ValueError(
                 f"Values {values} should be an integer or an Iterable (list, numpy array, pytorch, tensorflow tensors)"
@@ -911,6 +1005,19 @@ def _load_names_from_file(names_filepath):
 class Sequence:
     """Construct a list of feature from a single type or a dict of types.
     Mostly here for compatiblity with tfds.
+
+    Args:
+        feature: A list of features of a single type or a dictionary of types.
+        length (`int`): Length of the sequence.
+
+    Example:
+
+    ```py
+    >>> from datasets import Features, Sequence, Value, ClassLabel
+    >>> features = Features({'post': Sequence(feature={'text': Value(dtype='string'), 'upvotes': Value(dtype='int32'), 'label': ClassLabel(num_classes=2, names=['hot', 'cold'])})})
+    >>> features
+    {'post': Sequence(feature={'text': Value(dtype='string', id=None), 'upvotes': Value(dtype='int32', id=None), 'label': ClassLabel(num_classes=2, names=['hot', 'cold'], id=None)}, length=-1, id=None)}
+    ```
     """
 
     feature: Any
@@ -1453,6 +1560,17 @@ def copy(self) -> "Features":
 
         Returns:
             :class:`Features`
+
+        Example:
+
+        ```py
+        >>> from datasets import load_dataset
+        >>> ds = load_dataset("rotten_tomatoes", split="train")
+        >>> copy_of_features = ds.features.copy()
+        >>> copy_of_features
+        {'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None),
+         'text': Value(dtype='string', id=None)}
+        ```
         """
         return copy.deepcopy(self)
 
@@ -1527,6 +1645,20 @@ def flatten(self, max_depth=16) -> "Features":
 
         Returns:
             Features: the flattened features
+
+        Example:
+
+        ```py
+        >>> from datasets import load_dataset
+        >>> ds = load_dataset("squad", split="train")
+        >>> ds.features.flatten()
+        {'answers.answer_start': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),
+         'answers.text': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None),
+         'context': Value(dtype='string', id=None),
+         'id': Value(dtype='string', id=None),
+         'question': Value(dtype='string', id=None),
+         'title': Value(dtype='string', id=None)}
+        ```
         """
         for depth in range(1, max_depth):
             no_change = True

diff --git a/src/datasets/features/image.py b/src/datasets/features/image.py
@@ -41,6 +41,16 @@ class Image:
     Args:
         decode (:obj:`bool`, default ``True``): Whether to decode the image data. If `False`,
             returns the underlying dictionary in the format {"path": image_path, "bytes": image_bytes}.
+
+    Examples:
+
+    ```py
+    >>> from datasets import load_dataset, Image
+    >>> ds = load_dataset("beans", split="train")
+    >>> ds = ds.cast_column('image', Image(decode=False))
+    {'bytes': None,
+     'path': '/root/.cache/huggingface/datasets/downloads/extracted/b0a21163f78769a2cf11f58dfc767fb458fc7cea5c05dccc0144a2c0f0bc1292/train/healthy/healthy_train.85.jpg'}
+    ```
     """
 
     decode: bool = True

diff --git a/src/datasets/info.py b/src/datasets/info.py
@@ -353,6 +353,14 @@ def write_to_directory(self, metric_info_dir, pretty_print=False):
         """Write `MetricInfo` as JSON to `metric_info_dir`.
         Also save the license separately in LICENCE.
         If `pretty_print` is True, the JSON will be pretty-printed with the indent level of 4.
+
+        Example:
+
+        ```py
+        >>> from datasets import load_metric
+        >>> metric = load_metric("accuracy")
+        >>> metric.info.write_to_directory("/path/to/directory/")
+        ```
         """
         with open(os.path.join(metric_info_dir, config.METRIC_INFO_FILENAME), "w", encoding="utf-8") as f:
             json.dump(asdict(self), f, indent=4 if pretty_print else None)
@@ -368,6 +376,13 @@ def from_directory(cls, metric_info_dir) -> "MetricInfo":
         Args:
             metric_info_dir: `str` The directory containing the metadata file. This
                 should be the root directory of a specific dataset version.
+
+        Example:
+
+        ```py
+        >>> from datasets import MetricInfo
+        >>> metric_info = MetricInfo.from_directory("/path/to/directory/")
+        ```
         """
         logger.info(f"Loading Metric info from {metric_info_dir}")
         if not metric_info_dir:

diff --git a/src/datasets/metric.py b/src/datasets/metric.py
@@ -401,6 +401,14 @@ def compute(self, *, predictions=None, references=None, **kwargs) -> Optional[di
 
             - Dictionary with the metrics if this metric is run on the main process (``process_id == 0``).
             - None if the metric is not run on the main process (``process_id != 0``).
+
+        Example:
+
+        ```py
+        >>> from datasets import load_metric
+        >>> metric = load_metric("accuracy")
+        >>> accuracy = metric.compute(predictions=model_prediction, references=labels)
+        ```
         """
         all_kwargs = {"predictions": predictions, "references": references, **kwargs}
         if predictions is None and references is None:
@@ -454,6 +462,14 @@ def add_batch(self, *, predictions=None, references=None, **kwargs):
         Args:
             predictions (list/array/tensor, optional): Predictions.
             references (list/array/tensor, optional): References.
+
+        Example:
+
+        ```py
+        >>> from datasets import load_metric
+        >>> metric = load_metric("accuracy")
+        >>> metric.add_batch(predictions=model_prediction, references=labels)
+        ```
         """
         bad_inputs = [input_name for input_name in kwargs if input_name not in self.features]
         if bad_inputs:
@@ -493,6 +509,14 @@ def add(self, *, prediction=None, reference=None, **kwargs):
         Args:
             prediction (list/array/tensor, optional): Predictions.
             reference (list/array/tensor, optional): References.
+
+        Example:
+
+        ```py
+        >>> from datasets import load_metric
+        >>> metric = load_metric("accuracy")
+        >>> metric.add(predictions=model_predictions, references=labels)
+        ```
         """
         bad_inputs = [input_name for input_name in kwargs if input_name not in self.features]
         if bad_inputs: