Skip to content

Commit ad3467e

Browse files
authored
remove_columns/rename_columns doc fixes (#6772)
* `remove_columns`/`rename_columns` doc fixes * Nit
1 parent 7599f15 commit ad3467e

File tree

2 files changed

+18
-18
lines changed

2 files changed

+18
-18
lines changed

src/datasets/arrow_dataset.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2173,7 +2173,7 @@ def remove_columns(self, column_names: Union[str, List[str]], new_fingerprint: O
21732173
Remove one or several column(s) in the dataset and the features associated to them.
21742174
21752175
You can also remove a column using [`~datasets.Dataset.map`] with `remove_columns` but the present method
2176-
is in-place (doesn't copy the data to a new dataset) and is thus faster.
2176+
doesn't copy the data of the remaining columns and is thus faster.
21772177
21782178
Args:
21792179
column_names (`Union[str, List[str]]`):
@@ -2190,12 +2190,12 @@ def remove_columns(self, column_names: Union[str, List[str]], new_fingerprint: O
21902190
```py
21912191
>>> from datasets import load_dataset
21922192
>>> ds = load_dataset("rotten_tomatoes", split="validation")
2193-
>>> ds.remove_columns('label')
2193+
>>> ds = ds.remove_columns('label')
21942194
Dataset({
21952195
features: ['text'],
21962196
num_rows: 1066
21972197
})
2198-
>>> ds.remove_columns(column_names=ds.column_names) # Removing all the columns returns an empty dataset with the `num_rows` property set to 0
2198+
>>> ds = ds.remove_columns(column_names=ds.column_names) # Removing all the columns returns an empty dataset with the `num_rows` property set to 0
21992199
Dataset({
22002200
features: [],
22012201
num_rows: 0
@@ -2247,7 +2247,7 @@ def rename_column(
22472247
```py
22482248
>>> from datasets import load_dataset
22492249
>>> ds = load_dataset("rotten_tomatoes", split="validation")
2250-
>>> ds.rename_column('label', 'label_new')
2250+
>>> ds = ds.rename_column('label', 'label_new')
22512251
Dataset({
22522252
features: ['text', 'label_new'],
22532253
num_rows: 1066
@@ -2310,7 +2310,7 @@ def rename_columns(self, column_mapping: Dict[str, str], new_fingerprint: Option
23102310
```py
23112311
>>> from datasets import load_dataset
23122312
>>> ds = load_dataset("rotten_tomatoes", split="validation")
2313-
>>> ds.rename_columns({'text': 'text_new', 'label': 'label_new'})
2313+
>>> ds = ds.rename_columns({'text': 'text_new', 'label': 'label_new'})
23142314
Dataset({
23152315
features: ['text_new', 'label_new'],
23162316
num_rows: 1066
@@ -2425,7 +2425,7 @@ def __len__(self):
24252425
def __iter__(self):
24262426
"""Iterate through the examples.
24272427
2428-
If a formatting is set with :meth:`Dataset.set_format` rows will be returned with the
2428+
If a formatting is set with [`Dataset.set_format`] rows will be returned with the
24292429
selected format.
24302430
"""
24312431
if self._indices is None:
@@ -5960,7 +5960,7 @@ def add_elasticsearch_index(
59605960
The column of the documents to add to the index.
59615961
index_name (`str`, *optional*):
59625962
The `index_name`/identifier of the index.
5963-
This is the index name that is used to call [`~Dataset.get_nearest_examples`] or [`Dataset.search`].
5963+
This is the index name that is used to call [`~Dataset.get_nearest_examples`] or [`~Dataset.search`].
59645964
By default it corresponds to `column`.
59655965
host (`str`, *optional*, defaults to `localhost`):
59665966
Host of where ElasticSearch is running.

src/datasets/dataset_dict.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def num_columns(self) -> Dict[str, int]:
135135

136136
@property
137137
def num_rows(self) -> Dict[str, int]:
138-
"""Number of rows in each split of the dataset (same as :func:`datasets.Dataset.__len__`).
138+
"""Number of rows in each split of the dataset.
139139
140140
Example:
141141
@@ -222,7 +222,7 @@ def unique(self, column: str) -> Dict[str, List]:
222222
223223
Args:
224224
column (`str`):
225-
column name (list all the column names with [`~datasets.Dataset.column_names`])
225+
column name (list all the column names with [`~datasets.DatasetDict.column_names`])
226226
227227
Returns:
228228
Dict[`str`, `list`]: Dictionary of unique elements in the given column.
@@ -268,15 +268,12 @@ def cast(self, features: Features) -> "DatasetDict":
268268
Cast the dataset to a new set of features.
269269
The transformation is applied to all the datasets of the dataset dictionary.
270270
271-
You can also remove a column using [`Dataset.map`] with `feature` but `cast`
272-
is in-place (doesn't copy the data to a new dataset) and is thus faster.
273-
274271
Args:
275272
features ([`Features`]):
276273
New features to cast the dataset to.
277274
The name and order of the fields in the features must match the current column names.
278275
The type of the data must also be convertible from one type to the other.
279-
For non-trivial conversion, e.g. `string` <-> `ClassLabel` you should use [`~Dataset.map`] to update the Dataset.
276+
For non-trivial conversion, e.g. `string` <-> `ClassLabel` you should use [`~DatasetDict.map`] to update the dataset.
280277
281278
Example:
282279
@@ -334,19 +331,22 @@ def remove_columns(self, column_names: Union[str, List[str]]) -> "DatasetDict":
334331
335332
The transformation is applied to all the splits of the dataset dictionary.
336333
337-
You can also remove a column using [`Dataset.map`] with `remove_columns` but the present method
338-
is in-place (doesn't copy the data to a new dataset) and is thus faster.
334+
You can also remove a column using [`~DatasetDict.map`] with `remove_columns` but the present method
335+
doesn't copy the data of the remaining columns and is thus faster.
339336
340337
Args:
341338
column_names (`Union[str, List[str]]`):
342339
Name of the column(s) to remove.
343340
341+
Returns:
342+
[`DatasetDict`]: A copy of the dataset object without the columns to remove.
343+
344344
Example:
345345
346346
```py
347347
>>> from datasets import load_dataset
348348
>>> ds = load_dataset("rotten_tomatoes")
349-
>>> ds.remove_columns("label")
349+
>>> ds = ds.remove_columns("label")
350350
DatasetDict({
351351
train: Dataset({
352352
features: ['text'],
@@ -371,7 +371,7 @@ def rename_column(self, original_column_name: str, new_column_name: str) -> "Dat
371371
Rename a column in the dataset and move the features associated to the original column under the new column name.
372372
The transformation is applied to all the datasets of the dataset dictionary.
373373
374-
You can also rename a column using [`~Dataset.map`] with `remove_columns` but the present method:
374+
You can also rename a column using [`~DatasetDict.map`] with `remove_columns` but the present method:
375375
- takes care of moving the original features under the new column name.
376376
- doesn't copy the data to a new dataset and is thus much faster.
377377
@@ -386,7 +386,7 @@ def rename_column(self, original_column_name: str, new_column_name: str) -> "Dat
386386
```py
387387
>>> from datasets import load_dataset
388388
>>> ds = load_dataset("rotten_tomatoes")
389-
>>> ds.rename_column("label", "label_new")
389+
>>> ds = ds.rename_column("label", "label_new")
390390
DatasetDict({
391391
train: Dataset({
392392
features: ['text', 'label_new'],

0 commit comments

Comments
 (0)