Skip to content

Commit 9424188

Browse files
committed
set default values for deprecated languages and licenses
1 parent afa1a16 commit 9424188

2 files changed

Lines changed: 25 additions & 8 deletions

File tree

src/datasets/utils/metadata.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22
import logging
33
import re
4+
import warnings
45
from collections import Counter
56
from dataclasses import dataclass, fields
67
from pathlib import Path
@@ -219,9 +220,7 @@ class DatasetMetadata:
219220
annotations_creators: List[str]
220221
language_creators: Union[EmptyList, List[str]]
221222
language: Union[EmptyList, List[str]]
222-
languages: Union[EmptyList, List[str]] # deprecated
223223
license: List[str]
224-
licenses: List[str] # deprecated
225224
multilinguality: List[str]
226225
pretty_name: str
227226
size_categories: List[str]
@@ -233,10 +232,19 @@ class DatasetMetadata:
233232
configs: Optional[List[str]] = None
234233
extra_gated_fields: Optional[Dict] = None
235234
extra_gated_prompt: Optional[str] = None
235+
licenses: Optional[Union[EmptyList, List[str]]] = None # deprecated
236+
languages: Optional[Union[EmptyList, List[str]]] = None # deprecated
236237

237238
# class attributes
238239
_FIELDS_WITH_DASHES: ClassVar[set] = {"train_eval_index"} # train-eval-index in the YAML metadata
239240
_ALLOWED_YAML_KEYS: ClassVar[set] = set() # populated later
241+
_DEPRECATED_YAML_KEYS = ["licenses", "languages"]
242+
243+
def __post_init__(self):
244+
if self.licenses is not None:
245+
warnings.warning("The 'licenses' YAML field is deprecated, please use 'license' instead.")
246+
if self.languages is not None:
247+
warnings.warning("The 'languages' YAML field is deprecated, please use 'language' instead.")
240248

241249
def validate(self):
242250
validate_metadata_type(metadata_dict=vars(self))
@@ -272,9 +280,9 @@ def validate(self):
272280
}
273281

274282
exception_msg_dict = dict()
275-
for field, errs in errors.items():
283+
for yaml_field, errs in errors.items():
276284
if errs is not None:
277-
exception_msg_dict[field] = errs
285+
exception_msg_dict[yaml_field] = errs
278286
if len(exception_msg_dict) > 0:
279287
raise TypeError(
280288
"Could not validate the metadata, found the following errors:\n"

tests/test_metadata_util.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ def test_metadata_dict_from_readme(self):
173173
with open(path, "w+") as readme_file:
174174
readme_file.write(README_YAML)
175175
metadata_dict = metadata_dict_from_readme(path)
176-
self.assertDictEqual(metadata_dict, {"languages": ["zh", "en"], "task_ids": ["sentiment-classification"]})
176+
self.assertDictEqual(metadata_dict, {"language": ["zh", "en"], "task_ids": ["sentiment-classification"]})
177177

178178
with open(path, "w+") as readme_file:
179179
readme_file.write(README_EMPTY_YAML)
@@ -188,7 +188,15 @@ def test_metadata_dict_from_readme(self):
188188
def test_from_yaml_string(self):
189189

190190
default_optional_keys = {
191-
field.name: field.default for field in fields(DatasetMetadata) if type(field.default) is _MISSING_TYPE
191+
field.name: field.default
192+
for field in fields(DatasetMetadata)
193+
if type(field.default) is _MISSING_TYPE and field.name not in DatasetMetadata._DEPRECATED_YAML_KEYS
194+
}
195+
196+
default_deprecated_keys = {
197+
field.name: field.default
198+
for field in fields(DatasetMetadata)
199+
if field.name in DatasetMetadata._DEPRECATED_YAML_KEYS
192200
}
193201

194202
valid_yaml_string = _dedent(
@@ -487,10 +495,11 @@ def test_from_yaml_string(self):
487495
metadata_dict = asdict(metadata)
488496
expected = {
489497
**default_optional_keys,
498+
**default_deprecated_keys,
490499
"annotations_creators": ["found"],
491500
"language_creators": ["found"],
492-
"languages": ["en"],
493-
"licenses": ["unknown"],
501+
"language": ["en"],
502+
"license": ["unknown"],
494503
"multilinguality": ["monolingual"],
495504
"pretty_name": "Test Dataset",
496505
"size_categories": ["10K<n<100K"],

0 commit comments

Comments
 (0)