From 3d0a1404f435bc0a3de51323c1b1931af6026273 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Wed, 2 Jul 2025 17:07:36 +0200 Subject: [PATCH] fix infer list of images --- src/datasets/arrow_writer.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/datasets/arrow_writer.py b/src/datasets/arrow_writer.py index 64097b773f1..72fba8d54a7 100644 --- a/src/datasets/arrow_writer.py +++ b/src/datasets/arrow_writer.py @@ -28,6 +28,7 @@ from .features import Audio, Features, Image, Pdf, Value, Video from .features.features import ( FeatureType, + List, _ArrayXDExtensionType, _visit, cast_to_python_objects, @@ -193,9 +194,9 @@ def _infer_custom_type_and_encode(data: Iterable) -> tuple[Iterable, Optional[Fe if isinstance(non_null_value, PIL.Image.Image): return [Image().encode_example(value) if value is not None else None for value in data], Image() if isinstance(non_null_value, list) and isinstance(non_null_value[0], PIL.Image.Image): - return [[Image().encode_example(x) for x in value] if value is not None else None for value in data], [ - Image() - ] + return [ + [Image().encode_example(x) for x in value] if value is not None else None for value in data + ], List(Image()) if config.PDFPLUMBER_AVAILABLE and "pdfplumber" in sys.modules: import pdfplumber @@ -203,9 +204,9 @@ def _infer_custom_type_and_encode(data: Iterable) -> tuple[Iterable, Optional[Fe if isinstance(non_null_value, pdfplumber.pdf.PDF): return [Pdf().encode_example(value) if value is not None else None for value in data], Pdf() if isinstance(non_null_value, list) and isinstance(non_null_value[0], pdfplumber.pdf.PDF): - return [[Pdf().encode_example(x) for x in value] if value is not None else None for value in data], [ - Pdf() - ] + return [ + [Pdf().encode_example(x) for x in value] if value is not None else None for value in data + ], List(Pdf()) return data, None def __arrow_array__(self, type: Optional[pa.DataType] = None):