From 6335236e75a34c408193c66e942fdc2cd5590776 Mon Sep 17 00:00:00 2001 From: Felix Marty Date: Thu, 12 May 2022 11:30:05 +0200 Subject: [PATCH 1/3] added hint --- src/datasets/arrow_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py index 01ee0ef9064..0acf64e29cd 100644 --- a/src/datasets/arrow_dataset.py +++ b/src/datasets/arrow_dataset.py @@ -1562,7 +1562,7 @@ def cast_column(self, column: str, feature: FeatureType, new_fingerprint: str) - @transmit_tasks @fingerprint_transform(inplace=False) - def remove_columns(self, column_names: Union[str, List[str]], new_fingerprint) -> "Dataset": + def remove_columns(self, column_names: Union[str, List[str]], new_fingerprint: Optional[str] = None) -> "Dataset": """ Remove one or several column(s) in the dataset and the features associated to them. From 9a8dc3ee478b16ddbbdf49ca942ad858d30c4282 Mon Sep 17 00:00:00 2001 From: Felix Marty Date: Thu, 12 May 2022 12:49:33 +0200 Subject: [PATCH 2/3] added type hints --- src/datasets/arrow_dataset.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py index 0acf64e29cd..c8920761e15 100644 --- a/src/datasets/arrow_dataset.py +++ b/src/datasets/arrow_dataset.py @@ -1409,11 +1409,15 @@ def cast_to_class_labels(batch): return dset @fingerprint_transform(inplace=False) - def flatten(self, new_fingerprint, max_depth=16) -> "Dataset": + def flatten(self, new_fingerprint: Optional[str] = None, max_depth=16) -> "Dataset": """Flatten the table. Each column with a struct type is flattened into one column per struct field. Other columns are left unchanged. + Args: + new_fingerprint (:obj:`str`, optional): The new fingerprint of the dataset after transform. + If `None`, the new fingerprint is computed using a hash of the previous fingerprint, and the transform arguments. + Returns: :class:`Dataset`: A copy of the dataset with flattened columns. @@ -1524,12 +1528,14 @@ def cast( return dataset @fingerprint_transform(inplace=False) - def cast_column(self, column: str, feature: FeatureType, new_fingerprint: str) -> "Dataset": + def cast_column(self, column: str, feature: FeatureType, new_fingerprint: Optional[str] = None) -> "Dataset": """Cast column to feature for decoding. Args: column (:obj:`str`): Column name. feature (:class:`FeatureType`): Target feature. + new_fingerprint (:obj:`str`, optional): The new fingerprint of the dataset after transform. + If `None`, the new fingerprint is computed using a hash of the previous fingerprint, and the transform arguments. Returns: :class:`Dataset` @@ -1610,7 +1616,7 @@ def remove_columns(self, column_names: Union[str, List[str]], new_fingerprint: O @transmit_tasks @fingerprint_transform(inplace=False) - def rename_column(self, original_column_name: str, new_column_name: str, new_fingerprint) -> "Dataset": + def rename_column(self, original_column_name: str, new_column_name: str, new_fingerprint: Optional[str] = None) -> "Dataset": """ Rename a column in the dataset, and move the features associated to the original column under the new column name. @@ -1672,13 +1678,15 @@ def rename(columns): @transmit_tasks @fingerprint_transform(inplace=False) - def rename_columns(self, column_mapping: Dict[str, str], new_fingerprint) -> "Dataset": + def rename_columns(self, column_mapping: Dict[str, str], new_fingerprint: Optional[str] = None) -> "Dataset": """ Rename several columns in the dataset, and move the features associated to the original columns under the new column names. Args: column_mapping (:obj:`Dict[str, str]`): A mapping of columns to rename to their new names + new_fingerprint (:obj:`str`, optional): The new fingerprint of the dataset after transform. + If `None`, the new fingerprint is computed using a hash of the previous fingerprint, and the transform arguments. Returns: :class:`Dataset`: A copy of the dataset with renamed columns From b2759c6d2fbe116bfd73620af36b50d242577bb4 Mon Sep 17 00:00:00 2001 From: Felix Marty Date: Thu, 12 May 2022 13:07:16 +0200 Subject: [PATCH 3/3] quality --- src/datasets/arrow_dataset.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py index c8920761e15..38a61de3934 100644 --- a/src/datasets/arrow_dataset.py +++ b/src/datasets/arrow_dataset.py @@ -1616,7 +1616,9 @@ def remove_columns(self, column_names: Union[str, List[str]], new_fingerprint: O @transmit_tasks @fingerprint_transform(inplace=False) - def rename_column(self, original_column_name: str, new_column_name: str, new_fingerprint: Optional[str] = None) -> "Dataset": + def rename_column( + self, original_column_name: str, new_column_name: str, new_fingerprint: Optional[str] = None + ) -> "Dataset": """ Rename a column in the dataset, and move the features associated to the original column under the new column name.