Skip to content

Commit f234fce

Browse files
authored
Allow null values in dict columns (#6743)
1 parent 5fa934e commit f234fce

File tree

2 files changed

+12
-2
lines changed

2 files changed

+12
-2
lines changed

src/datasets/features/features.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1937,7 +1937,7 @@ def encode_column(self, column, column_name: str):
19371937
`list[Any]`
19381938
"""
19391939
column = cast_to_python_objects(column)
1940-
return [encode_nested_example(self[column_name], obj) for obj in column]
1940+
return [encode_nested_example(self[column_name], obj, level=1) for obj in column]
19411941

19421942
def encode_batch(self, batch):
19431943
"""
@@ -1955,7 +1955,7 @@ def encode_batch(self, batch):
19551955
raise ValueError(f"Column mismatch between batch {set(batch)} and features {set(self)}")
19561956
for key, column in batch.items():
19571957
column = cast_to_python_objects(column)
1958-
encoded_batch[key] = [encode_nested_example(self[key], obj) for obj in column]
1958+
encoded_batch[key] = [encode_nested_example(self[key], obj, level=1) for obj in column]
19591959
return encoded_batch
19601960

19611961
def decode_example(self, example: dict, token_per_repo_id: Optional[Dict[str, Union[str, bool, None]]] = None):

tests/features/test_features.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,16 @@ def test_encode_batch_with_example_with_empty_first_elem():
405405
assert encoded_batch == {"x": [[[0], [1]], [[], [1]]]}
406406

407407

408+
def test_encode_column_dict_with_none():
409+
features = Features(
410+
{
411+
"x": {"a": ClassLabel(names=["a", "b"]), "b": Value("int32")},
412+
}
413+
)
414+
encoded_column = features.encode_column([{"a": "a", "b": 1}, None], "x")
415+
assert encoded_column == [{"a": 0, "b": 1}, None]
416+
417+
408418
@pytest.mark.parametrize(
409419
"feature",
410420
[

0 commit comments

Comments
 (0)