remove_columns/rename_columns doc fixes (#6772)

mariosasko · web-flow · commit ad3467e9b138 · 2024-04-02T18:17:46.000+02:00
* `remove_columns`/`rename_columns` doc fixes

* Nit
diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py
@@ -2173,7 +2173,7 @@ def remove_columns(self, column_names: Union[str, List[str]], new_fingerprint: O
         Remove one or several column(s) in the dataset and the features associated to them.
 
         You can also remove a column using [`~datasets.Dataset.map`] with `remove_columns` but the present method
-        is in-place (doesn't copy the data to a new dataset) and is thus faster.
+        doesn't copy the data of the remaining columns and is thus faster.
 
         Args:
             column_names (`Union[str, List[str]]`):
@@ -2190,12 +2190,12 @@ def remove_columns(self, column_names: Union[str, List[str]], new_fingerprint: O
         ```py
         >>> from datasets import load_dataset
         >>> ds = load_dataset("rotten_tomatoes", split="validation")
-        >>> ds.remove_columns('label')
+        >>> ds = ds.remove_columns('label')
         Dataset({
             features: ['text'],
             num_rows: 1066
         })
-        >>> ds.remove_columns(column_names=ds.column_names) # Removing all the columns returns an empty dataset with the `num_rows` property set to 0
+        >>> ds = ds.remove_columns(column_names=ds.column_names) # Removing all the columns returns an empty dataset with the `num_rows` property set to 0
         Dataset({
             features: [],
             num_rows: 0
@@ -2247,7 +2247,7 @@ def rename_column(
         ```py
         >>> from datasets import load_dataset
         >>> ds = load_dataset("rotten_tomatoes", split="validation")
-        >>> ds.rename_column('label', 'label_new')
+        >>> ds = ds.rename_column('label', 'label_new')
         Dataset({
             features: ['text', 'label_new'],
             num_rows: 1066
@@ -2310,7 +2310,7 @@ def rename_columns(self, column_mapping: Dict[str, str], new_fingerprint: Option
         ```py
         >>> from datasets import load_dataset
         >>> ds = load_dataset("rotten_tomatoes", split="validation")
-        >>> ds.rename_columns({'text': 'text_new', 'label': 'label_new'})
+        >>> ds = ds.rename_columns({'text': 'text_new', 'label': 'label_new'})
         Dataset({
             features: ['text_new', 'label_new'],
             num_rows: 1066
@@ -2425,7 +2425,7 @@ def __len__(self):
     def __iter__(self):
         """Iterate through the examples.
 
-        If a formatting is set with :meth:`Dataset.set_format` rows will be returned with the
+        If a formatting is set with [`Dataset.set_format`] rows will be returned with the
         selected format.
         """
         if self._indices is None:
@@ -5960,7 +5960,7 @@ def add_elasticsearch_index(
                 The column of the documents to add to the index.
             index_name (`str`, *optional*):
                 The `index_name`/identifier of the index.
-                This is the index name that is used to call [`~Dataset.get_nearest_examples`] or [`Dataset.search`].
+                This is the index name that is used to call [`~Dataset.get_nearest_examples`] or [`~Dataset.search`].
                 By default it corresponds to `column`.
             host (`str`, *optional*, defaults to `localhost`):
                 Host of where ElasticSearch is running.
diff --git a/src/datasets/dataset_dict.py b/src/datasets/dataset_dict.py
@@ -135,7 +135,7 @@ def num_columns(self) -> Dict[str, int]:
 
     @property
     def num_rows(self) -> Dict[str, int]:
-        """Number of rows in each split of the dataset (same as :func:`datasets.Dataset.__len__`).
+        """Number of rows in each split of the dataset.
 
         Example:
 
@@ -222,7 +222,7 @@ def unique(self, column: str) -> Dict[str, List]:
 
         Args:
             column (`str`):
-                column name (list all the column names with [`~datasets.Dataset.column_names`])
+                column name (list all the column names with [`~datasets.DatasetDict.column_names`])
 
         Returns:
             Dict[`str`, `list`]: Dictionary of unique elements in the given column.
@@ -268,15 +268,12 @@ def cast(self, features: Features) -> "DatasetDict":
         Cast the dataset to a new set of features.
         The transformation is applied to all the datasets of the dataset dictionary.
 
-        You can also remove a column using [`Dataset.map`] with `feature` but `cast`
-        is in-place (doesn't copy the data to a new dataset) and is thus faster.
-
         Args:
             features ([`Features`]):
                 New features to cast the dataset to.
                 The name and order of the fields in the features must match the current column names.
                 The type of the data must also be convertible from one type to the other.
-                For non-trivial conversion, e.g. `string` <-> `ClassLabel` you should use [`~Dataset.map`] to update the Dataset.
+                For non-trivial conversion, e.g. `string` <-> `ClassLabel` you should use [`~DatasetDict.map`] to update the dataset.
 
         Example:
 
@@ -334,19 +331,22 @@ def remove_columns(self, column_names: Union[str, List[str]]) -> "DatasetDict":
 
         The transformation is applied to all the splits of the dataset dictionary.
 
-        You can also remove a column using [`Dataset.map`] with `remove_columns` but the present method
-        is in-place (doesn't copy the data to a new dataset) and is thus faster.
+        You can also remove a column using [`~DatasetDict.map`] with `remove_columns` but the present method
+        doesn't copy the data of the remaining columns and is thus faster.
 
         Args:
             column_names (`Union[str, List[str]]`):
                 Name of the column(s) to remove.
 
+        Returns:
+            [`DatasetDict`]: A copy of the dataset object without the columns to remove.
+
         Example:
 
         ```py
         >>> from datasets import load_dataset
         >>> ds = load_dataset("rotten_tomatoes")
-        >>> ds.remove_columns("label")
+        >>> ds = ds.remove_columns("label")
         DatasetDict({
             train: Dataset({
                 features: ['text'],
@@ -371,7 +371,7 @@ def rename_column(self, original_column_name: str, new_column_name: str) -> "Dat
         Rename a column in the dataset and move the features associated to the original column under the new column name.
         The transformation is applied to all the datasets of the dataset dictionary.
 
-        You can also rename a column using [`~Dataset.map`] with `remove_columns` but the present method:
+        You can also rename a column using [`~DatasetDict.map`] with `remove_columns` but the present method:
             - takes care of moving the original features under the new column name.
             - doesn't copy the data to a new dataset and is thus much faster.
 
@@ -386,7 +386,7 @@ def rename_column(self, original_column_name: str, new_column_name: str) -> "Dat
         ```py
         >>> from datasets import load_dataset
         >>> ds = load_dataset("rotten_tomatoes")
-        >>> ds.rename_column("label", "label_new")
+        >>> ds = ds.rename_column("label", "label_new")
         DatasetDict({
             train: Dataset({
                 features: ['text', 'label_new'],