diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py index 01ee0ef9064..38a61de3934 100644 --- a/src/datasets/arrow_dataset.py +++ b/src/datasets/arrow_dataset.py @@ -1409,11 +1409,15 @@ def cast_to_class_labels(batch): return dset @fingerprint_transform(inplace=False) - def flatten(self, new_fingerprint, max_depth=16) -> "Dataset": + def flatten(self, new_fingerprint: Optional[str] = None, max_depth=16) -> "Dataset": """Flatten the table. Each column with a struct type is flattened into one column per struct field. Other columns are left unchanged. + Args: + new_fingerprint (:obj:`str`, optional): The new fingerprint of the dataset after transform. + If `None`, the new fingerprint is computed using a hash of the previous fingerprint, and the transform arguments. + Returns: :class:`Dataset`: A copy of the dataset with flattened columns. @@ -1524,12 +1528,14 @@ def cast( return dataset @fingerprint_transform(inplace=False) - def cast_column(self, column: str, feature: FeatureType, new_fingerprint: str) -> "Dataset": + def cast_column(self, column: str, feature: FeatureType, new_fingerprint: Optional[str] = None) -> "Dataset": """Cast column to feature for decoding. Args: column (:obj:`str`): Column name. feature (:class:`FeatureType`): Target feature. + new_fingerprint (:obj:`str`, optional): The new fingerprint of the dataset after transform. + If `None`, the new fingerprint is computed using a hash of the previous fingerprint, and the transform arguments. Returns: :class:`Dataset` @@ -1562,7 +1568,7 @@ def cast_column(self, column: str, feature: FeatureType, new_fingerprint: str) - @transmit_tasks @fingerprint_transform(inplace=False) - def remove_columns(self, column_names: Union[str, List[str]], new_fingerprint) -> "Dataset": + def remove_columns(self, column_names: Union[str, List[str]], new_fingerprint: Optional[str] = None) -> "Dataset": """ Remove one or several column(s) in the dataset and the features associated to them. @@ -1610,7 +1616,9 @@ def remove_columns(self, column_names: Union[str, List[str]], new_fingerprint) - @transmit_tasks @fingerprint_transform(inplace=False) - def rename_column(self, original_column_name: str, new_column_name: str, new_fingerprint) -> "Dataset": + def rename_column( + self, original_column_name: str, new_column_name: str, new_fingerprint: Optional[str] = None + ) -> "Dataset": """ Rename a column in the dataset, and move the features associated to the original column under the new column name. @@ -1672,13 +1680,15 @@ def rename(columns): @transmit_tasks @fingerprint_transform(inplace=False) - def rename_columns(self, column_mapping: Dict[str, str], new_fingerprint) -> "Dataset": + def rename_columns(self, column_mapping: Dict[str, str], new_fingerprint: Optional[str] = None) -> "Dataset": """ Rename several columns in the dataset, and move the features associated to the original columns under the new column names. Args: column_mapping (:obj:`Dict[str, str]`): A mapping of columns to rename to their new names + new_fingerprint (:obj:`str`, optional): The new fingerprint of the dataset after transform. + If `None`, the new fingerprint is computed using a hash of the previous fingerprint, and the transform arguments. Returns: :class:`Dataset`: A copy of the dataset with renamed columns