diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py index c0e41fbbfb1..369287d2dea 100644 --- a/src/datasets/arrow_dataset.py +++ b/src/datasets/arrow_dataset.py @@ -793,7 +793,7 @@ def class_encode_column(self, column: str) -> "Dataset": dset = dset.map(lambda batch: {column: dst_feat.str2int(batch)}, input_columns=column, batched=True) dset = concatenate_datasets([self.remove_columns([column]), dset], axis=1) - new_features = copy.deepcopy(dset.features) + new_features = dset.features.copy() new_features[column] = dst_feat dset = dset.cast(new_features) @@ -2791,7 +2791,7 @@ def add_column(self, name: str, column: Union[list, np.array], new_fingerprint: # Concatenate tables horizontally table = ConcatenationTable.from_tables([self._data, column_table], axis=1) # Update features - info = copy.deepcopy(self.info) + info = self.info.copy() info.features.update(Features.from_arrow_schema(column_table.schema)) table = update_metadata_with_features(table, info.features) return Dataset(table, info=info, split=self.split, indices_table=self._indices, fingerprint=new_fingerprint) @@ -3017,7 +3017,7 @@ def add_item(self, item: dict, new_fingerprint: str): indices_table = concat_tables([self._indices, item_indices_table]) return Dataset( table, - info=copy.deepcopy(self.info), + info=self.info.copy(), split=self.split, indices_table=indices_table, fingerprint=new_fingerprint, diff --git a/src/datasets/features.py b/src/datasets/features.py index 8e432f2523a..26c77cf56a3 100644 --- a/src/datasets/features.py +++ b/src/datasets/features.py @@ -15,6 +15,7 @@ # Lint as: python3 """ This class handle features definition in datasets and some utilities to display table type.""" +import copy import re from collections.abc import Iterable from dataclasses import dataclass, field, fields @@ -960,5 +961,5 @@ def encode_batch(self, batch): encoded_batch[key] = [encode_nested_example(self[key], obj) for obj in column] return encoded_batch - def copy(self): - return Features(super().copy()) + def copy(self) -> "Features": + return copy.deepcopy(self)