|
7 | 7 | import pytest |
8 | 8 |
|
9 | 9 | from datasets import Sequence, Value |
10 | | -from datasets.features.features import ClassLabel, Features |
| 10 | +from datasets.features.features import ClassLabel, Features, Image |
11 | 11 | from datasets.table import ( |
12 | 12 | ConcatenationTable, |
13 | 13 | InMemoryTable, |
|
20 | 20 | _memory_mapped_arrow_table_from_file, |
21 | 21 | cast_array_to_feature, |
22 | 22 | concat_tables, |
| 23 | + embed_array_storage, |
| 24 | + embed_table_storage, |
23 | 25 | inject_arrow_table_documentation, |
| 26 | + table_cast, |
24 | 27 | ) |
25 | 28 |
|
26 | 29 | from .utils import assert_arrow_memory_doesnt_increase, assert_arrow_memory_increases, slow |
@@ -1045,3 +1048,29 @@ def test_cast_array_to_features_to_null_type(): |
1045 | 1048 | arr = pa.array([[None, 1]]) |
1046 | 1049 | with pytest.raises(TypeError): |
1047 | 1050 | cast_array_to_feature(arr, Sequence(Value("null"))) |
| 1051 | + |
| 1052 | + |
| 1053 | +def test_embed_array_storage(image_file): |
| 1054 | + array = pa.array([{"bytes": None, "path": image_file}], type=Image.pa_type) |
| 1055 | + embedded_images_array = embed_array_storage(array, Image()) |
| 1056 | + assert embedded_images_array.to_pylist()[0]["path"] is None |
| 1057 | + assert isinstance(embedded_images_array.to_pylist()[0]["bytes"], bytes) |
| 1058 | + |
| 1059 | + |
| 1060 | +def test_embed_array_storage_nested(image_file): |
| 1061 | + array = pa.array([[{"bytes": None, "path": image_file}]], type=pa.list_(Image.pa_type)) |
| 1062 | + embedded_images_array = embed_array_storage(array, [Image()]) |
| 1063 | + assert embedded_images_array.to_pylist()[0][0]["path"] is None |
| 1064 | + assert isinstance(embedded_images_array.to_pylist()[0][0]["bytes"], bytes) |
| 1065 | + array = pa.array([{"foo": {"bytes": None, "path": image_file}}], type=pa.struct({"foo": Image.pa_type})) |
| 1066 | + embedded_images_array = embed_array_storage(array, {"foo": Image()}) |
| 1067 | + assert embedded_images_array.to_pylist()[0]["foo"]["path"] is None |
| 1068 | + assert isinstance(embedded_images_array.to_pylist()[0]["foo"]["bytes"], bytes) |
| 1069 | + |
| 1070 | + |
| 1071 | +def test_embed_table_storage(image_file): |
| 1072 | + features = Features({"image": Image()}) |
| 1073 | + table = table_cast(pa.table({"image": [image_file]}), features.arrow_schema) |
| 1074 | + embedded_images_table = embed_table_storage(table) |
| 1075 | + assert embedded_images_table.to_pydict()["image"][0]["path"] is None |
| 1076 | + assert isinstance(embedded_images_table.to_pydict()["image"][0]["bytes"], bytes) |
0 commit comments