Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions src/datasets/arrow_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from .features import Audio, Features, Image, Pdf, Value, Video
from .features.features import (
FeatureType,
List,
_ArrayXDExtensionType,
_visit,
cast_to_python_objects,
Expand Down Expand Up @@ -193,19 +194,19 @@ def _infer_custom_type_and_encode(data: Iterable) -> tuple[Iterable, Optional[Fe
if isinstance(non_null_value, PIL.Image.Image):
return [Image().encode_example(value) if value is not None else None for value in data], Image()
if isinstance(non_null_value, list) and isinstance(non_null_value[0], PIL.Image.Image):
return [[Image().encode_example(x) for x in value] if value is not None else None for value in data], [
Image()
]
return [
[Image().encode_example(x) for x in value] if value is not None else None for value in data
], List(Image())
if config.PDFPLUMBER_AVAILABLE and "pdfplumber" in sys.modules:
import pdfplumber

non_null_idx, non_null_value = first_non_null_non_empty_value(data)
if isinstance(non_null_value, pdfplumber.pdf.PDF):
return [Pdf().encode_example(value) if value is not None else None for value in data], Pdf()
if isinstance(non_null_value, list) and isinstance(non_null_value[0], pdfplumber.pdf.PDF):
return [[Pdf().encode_example(x) for x in value] if value is not None else None for value in data], [
Pdf()
]
return [
[Pdf().encode_example(x) for x in value] if value is not None else None for value in data
], List(Pdf())
return data, None

def __arrow_array__(self, type: Optional[pa.DataType] = None):
Expand Down
Loading