@@ -41,20 +41,10 @@ def load_json_resource(resource: str) -> Tuple[Any, str]:
4141 return json .loads (content ), f"{ BASE_REF_URL } /resources/{ resource } "
4242
4343
44- def load_tsv_licenses (resource : str ) -> Tuple [Any , str ]:
45- content = pkg_resources .read_text (resources , resource )
46- licenses = {
47- line .split ("\t " )[1 ].strip (): line .split ("\t " )[0 ].strip () for line in content .splitlines () if "\t " in line
48- }
49- return licenses , f"{ BASE_REF_URL } /resources/{ resource } "
50-
51-
5244# Source of languages.json:
5345# https://datahub.io/core/language-codes/r/ietf-language-tags.csv
5446# Language names were obtained with langcodes: https://github.com/LuminosoInsight/langcodes
5547known_language_codes , known_language_codes_url = load_json_resource ("languages.json" )
56- # standard_licenses.tsv is to be kept in sync with the same file in `moon-landing` and `hub-docs`
57- known_licenses , known_licenses_url = load_tsv_licenses ("standard_licenses.tsv" )
5848known_task_ids , known_task_ids_url = load_json_resource ("tasks.json" )
5949known_creators , known_creators_url = load_json_resource ("creators.json" )
6050known_size_categories , known_size_categories_url = load_json_resource ("size_categories.json" )
@@ -270,7 +260,6 @@ def validate(self):
270260 )
271261 self .language_creators , language_creators_errors = self .validate_language_creators (self .language_creators )
272262 self .language , language_errors = self .validate_language_codes (self .language or self .languages )
273- self .license , license_errors = self .validate_licences (self .license or self .licenses )
274263 self .multilinguality , multilinguality_errors = self .validate_mulitlinguality (self .multilinguality )
275264 self .size_categories , size_categories_errors = self .validate_size_catgeories (self .size_categories )
276265 self .source_datasets , source_datasets_errors = self .validate_source_datasets (self .source_datasets )
@@ -284,7 +273,6 @@ def validate(self):
284273 errors = {
285274 "annotations_creators" : annotations_creators_errors ,
286275 "language_creators" : language_creators_errors ,
287- "license" : license_errors ,
288276 "multilinguality" : multilinguality_errors ,
289277 "size_categories" : size_categories_errors ,
290278 "source_datasets" : source_datasets_errors ,
@@ -394,16 +382,6 @@ def validate_language_codes(languages: Union[List[str], Dict[str, List[str]]]) -
394382 lambda lang : lang == "unknown" ,
395383 )
396384
397- @staticmethod
398- def validate_licences (licenses : Union [List [str ], Dict [str , List [str ]]]) -> ValidatorOutput :
399- validated , error = tagset_validator (
400- licenses ,
401- list (known_licenses .keys ()),
402- "license" ,
403- known_licenses_url ,
404- )
405- return validated , error
406-
407385 @staticmethod
408386 def validate_task_categories (task_categories : Union [List [str ], Dict [str , List [str ]]]) -> ValidatorOutput :
409387 # TODO: we're currently ignoring all values starting with 'other' as our task taxonomy is bound to change
0 commit comments