Skip to content

Commit 50bc312

Browse files
Remove license tag file and validation (#5004)
* Delete standard_licenses.tsv file * Remove license validation
1 parent 420e981 commit 50bc312

File tree

2 files changed

+0
-78
lines changed

2 files changed

+0
-78
lines changed

src/datasets/utils/metadata.py

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -41,20 +41,10 @@ def load_json_resource(resource: str) -> Tuple[Any, str]:
4141
return json.loads(content), f"{BASE_REF_URL}/resources/{resource}"
4242

4343

44-
def load_tsv_licenses(resource: str) -> Tuple[Any, str]:
45-
content = pkg_resources.read_text(resources, resource)
46-
licenses = {
47-
line.split("\t")[1].strip(): line.split("\t")[0].strip() for line in content.splitlines() if "\t" in line
48-
}
49-
return licenses, f"{BASE_REF_URL}/resources/{resource}"
50-
51-
5244
# Source of languages.json:
5345
# https://datahub.io/core/language-codes/r/ietf-language-tags.csv
5446
# Language names were obtained with langcodes: https://github.com/LuminosoInsight/langcodes
5547
known_language_codes, known_language_codes_url = load_json_resource("languages.json")
56-
# standard_licenses.tsv is to be kept in sync with the same file in `moon-landing` and `hub-docs`
57-
known_licenses, known_licenses_url = load_tsv_licenses("standard_licenses.tsv")
5848
known_task_ids, known_task_ids_url = load_json_resource("tasks.json")
5949
known_creators, known_creators_url = load_json_resource("creators.json")
6050
known_size_categories, known_size_categories_url = load_json_resource("size_categories.json")
@@ -270,7 +260,6 @@ def validate(self):
270260
)
271261
self.language_creators, language_creators_errors = self.validate_language_creators(self.language_creators)
272262
self.language, language_errors = self.validate_language_codes(self.language or self.languages)
273-
self.license, license_errors = self.validate_licences(self.license or self.licenses)
274263
self.multilinguality, multilinguality_errors = self.validate_mulitlinguality(self.multilinguality)
275264
self.size_categories, size_categories_errors = self.validate_size_catgeories(self.size_categories)
276265
self.source_datasets, source_datasets_errors = self.validate_source_datasets(self.source_datasets)
@@ -284,7 +273,6 @@ def validate(self):
284273
errors = {
285274
"annotations_creators": annotations_creators_errors,
286275
"language_creators": language_creators_errors,
287-
"license": license_errors,
288276
"multilinguality": multilinguality_errors,
289277
"size_categories": size_categories_errors,
290278
"source_datasets": source_datasets_errors,
@@ -394,16 +382,6 @@ def validate_language_codes(languages: Union[List[str], Dict[str, List[str]]]) -
394382
lambda lang: lang == "unknown",
395383
)
396384

397-
@staticmethod
398-
def validate_licences(licenses: Union[List[str], Dict[str, List[str]]]) -> ValidatorOutput:
399-
validated, error = tagset_validator(
400-
licenses,
401-
list(known_licenses.keys()),
402-
"license",
403-
known_licenses_url,
404-
)
405-
return validated, error
406-
407385
@staticmethod
408386
def validate_task_categories(task_categories: Union[List[str], Dict[str, List[str]]]) -> ValidatorOutput:
409387
# TODO: we're currently ignoring all values starting with 'other' as our task taxonomy is bound to change

src/datasets/utils/resources/standard_licenses.tsv

Lines changed: 0 additions & 56 deletions
This file was deleted.

0 commit comments

Comments
 (0)