From 271009afa37c25e1d25672ff9ae86a36374b475c Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Tue, 15 Jul 2025 12:12:50 +0200 Subject: [PATCH] fix audio cast storage from array + sampling_rate --- src/datasets/features/audio.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/datasets/features/audio.py b/src/datasets/features/audio.py index 5821ca20fdc..d42fe53f009 100644 --- a/src/datasets/features/audio.py +++ b/src/datasets/features/audio.py @@ -236,7 +236,9 @@ def cast_storage(self, storage: Union[pa.StringArray, pa.StructArray]) -> pa.Str path_array = pa.array([None] * len(storage), type=pa.string()) storage = pa.StructArray.from_arrays([storage, path_array], ["bytes", "path"], mask=storage.is_null()) elif pa.types.is_struct(storage.type) and storage.type.get_all_field_indices("array"): - storage = pa.array([Audio().encode_example(x) if x is not None else None for x in storage.to_pylist()]) + storage = pa.array( + [Audio().encode_example(x) if x is not None else None for x in storage.to_numpy(zero_copy_only=False)] + ) elif pa.types.is_struct(storage.type): if storage.type.get_field_index("bytes") >= 0: bytes_array = storage.field("bytes")