Skip to content

Commit afe875a

Browse files
Support features in metadata configs (#7182)
* Test metadata configs with features * Support features as metadata configs
1 parent 3822805 commit afe875a

File tree

2 files changed

+31
-2
lines changed

2 files changed

+31
-2
lines changed

src/datasets/utils/metadata.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from huggingface_hub import DatasetCardData
1010

1111
from ..config import METADATA_CONFIGS_FIELD
12+
from ..features import Features
1213
from ..info import DatasetInfo, DatasetInfosDict
1314
from ..naming import _split_re
1415
from ..utils.logging import get_logger
@@ -152,8 +153,12 @@ def from_dataset_card_data(cls, dataset_card_data: DatasetCardData) -> "Metadata
152153
cls._raise_if_data_files_field_not_valid(metadata_config)
153154
return cls(
154155
{
155-
config["config_name"]: {param: value for param, value in config.items() if param != "config_name"}
156-
for config in metadata_configs
156+
config.pop("config_name"): {
157+
param: value if param != "features" else Features._from_yaml_list(value)
158+
for param, value in config.items()
159+
}
160+
for metadata_config in metadata_configs
161+
if (config := metadata_config.copy())
157162
}
158163
)
159164
return cls()

tests/test_metadata_util.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from huggingface_hub import DatasetCard, DatasetCardData
1010

1111
from datasets.config import METADATA_CONFIGS_FIELD
12+
from datasets.features import Features, Value
1213
from datasets.info import DatasetInfo
1314
from datasets.utils.metadata import MetadataConfigs
1415

@@ -93,6 +94,21 @@ def _dedent(string: str) -> str:
9394
"""
9495

9596

97+
README_METADATA_WITH_FEATURES = f"""\
98+
---
99+
{METADATA_CONFIGS_FIELD}:
100+
- config_name: default
101+
features:
102+
- name: id
103+
dtype: int64
104+
- name: name
105+
dtype: string
106+
- name: score
107+
dtype: float64
108+
---
109+
"""
110+
111+
96112
EXPECTED_METADATA_SINGLE_CONFIG = {"custom": {"data_dir": "v1", "drop_labels": True}}
97113
EXPECTED_METADATA_TWO_CONFIGS_DEFAULT_FLAG = {
98114
"v1": {"data_dir": "v1", "drop_labels": True},
@@ -102,6 +118,13 @@ def _dedent(string: str) -> str:
102118
"custom": {"data_dir": "custom", "drop_labels": True},
103119
"default": {"data_dir": "data", "drop_labels": False},
104120
}
121+
EXPECTED_METADATA_WITH_FEATURES = {
122+
"default": {
123+
"features": Features(
124+
{"id": Value(dtype="int64"), "name": Value(dtype="string"), "score": Value(dtype="float64")}
125+
)
126+
}
127+
}
105128

106129

107130
@pytest.fixture
@@ -227,6 +250,7 @@ def test_from_yaml_string(self):
227250
(README_METADATA_SINGLE_CONFIG, EXPECTED_METADATA_SINGLE_CONFIG, "custom"),
228251
(README_METADATA_TWO_CONFIGS_WITH_DEFAULT_FLAG, EXPECTED_METADATA_TWO_CONFIGS_DEFAULT_FLAG, "v2"),
229252
(README_METADATA_TWO_CONFIGS_WITH_DEFAULT_NAME, EXPECTED_METADATA_TWO_CONFIGS_DEFAULT_NAME, "default"),
253+
(README_METADATA_WITH_FEATURES, EXPECTED_METADATA_WITH_FEATURES, "default"),
230254
],
231255
)
232256
def test_metadata_configs_dataset_card_data(

0 commit comments

Comments
 (0)