diff --git a/src/datasets/arrow_writer.py b/src/datasets/arrow_writer.py index d33fea4042d..761639ff4ad 100644 --- a/src/datasets/arrow_writer.py +++ b/src/datasets/arrow_writer.py @@ -372,10 +372,14 @@ def write_batch( writer_batch_size: Optional[int] = None, ): """Write a batch of Example to file. + Ignores the batch if it appears to be empty, + preventing a potential schema update of unknown types. Args: - example: the Example to add. + batch_examples: the batch of examples to add. """ + if batch_examples and len(next(iter(batch_examples.values()))) == 0: + return schema = None if self.pa_writer is None and self.update_features else self._schema try_schema = self._schema if self.pa_writer is None and self.update_features else None typed_sequence_examples = {}