huggingface · lhoestq · May 20, 2022 · May 18, 2022 · May 18, 2022 · May 18, 2022
diff --git a/.github/hub/update_hub_repositories.py b/.github/hub/update_hub_repositories.py
@@ -105,17 +105,6 @@ def check_authorizations(user_info: dict):
         )
 
 
-def apply_hacks_for_moon_landing(dataset_repo_path: Path):
-    if (dataset_repo_path / "README.md").is_file():
-        with (dataset_repo_path / "README.md").open() as f:
-            readme_content = f.read()
-        if readme_content.count("---\n") > 1:
-            _, tags, content = readme_content.split("---\n", 2)
-            tags = tags.replace("\nlicense:", "\nlicenses:").replace(".", "-").replace("$", "%")
-            with (dataset_repo_path / "README.md").open("w") as f:
-                f.write("---\n".join(["", tags, content]))
-
-
 class update_main:
     def __init__(
         self,

diff --git a/datasets/ade_corpus_v2/README.md b/datasets/ade_corpus_v2/README.md
@@ -10,28 +10,17 @@ licenses:
 multilinguality:
 - monolingual
 size_categories:
-  Ade_corpus_v2_classification:
-  - 10K<n<100K
-  Ade_corpus_v2_drug_ade_relation:
-  - 1K<n<10K
-  Ade_corpus_v2_drug_dosage_relation:
-  - n<1K
+- 10K<n<100K
+- 1K<n<10K
+- n<1K
 source_datasets:
 - original
 task_categories:
-  Ade_corpus_v2_classification:
-  - text-classification
-  Ade_corpus_v2_drug_ade_relation:
-  - token-classification
-  Ade_corpus_v2_drug_dosage_relation:
-  - token-classification
+- text-classification
+- token-classification
 task_ids:
-  Ade_corpus_v2_classification:
-  - fact-checking
-  Ade_corpus_v2_drug_ade_relation:
-  - coreference-resolution
-  Ade_corpus_v2_drug_dosage_relation:
-  - coreference-resolution
+- coreference-resolution
+- fact-checking
 paperswithcode_id: null
 pretty_name: Adverse Drug Reaction Data v2
 train-eval-index:
@@ -82,6 +71,10 @@ train-eval-index:
       name: Recall weighted
       args:
         average: weighted
+configs:
+- Ade_corpus_v2_classification
+- Ade_corpus_v2_drug_ade_relation
+- Ade_corpus_v2_drug_dosage_relation
 ---
 
 # Dataset Card for Adverse Drug Reaction Data v2

diff --git a/datasets/adv_glue/README.md b/datasets/adv_glue/README.md
@@ -15,31 +15,19 @@ size_categories:
 source_datasets:
 - extended|glue
 task_categories:
-  adv_mnli:
-  - text-classification
-  adv_mnli_mismatched:
-  - text-classification
-  adv_qnli:
-  - text-classification
-  adv_qqp:
-  - text-classification
-  adv_rte:
-  - text-classification
-  adv_sst2:
-  - text-classification
+- text-classification
 task_ids:
-  adv_mnli:
-  - natural-language-inference
-  adv_mnli_mismatched:
-  - natural-language-inference
-  adv_qnli:
-  - text-classification-other-qa-nli
-  adv_qqp:
-  - text-classification-other-paraphrase-identification
-  adv_rte:
-  - natural-language-inference
-  adv_sst2:
-  - sentiment-classification
+- natural-language-inference
+- sentiment-classification
+- text-classification-other-paraphrase-identification
+- text-classification-other-qa-nli
+configs:
+- adv_mnli
+- adv_mnli_mismatched
+- adv_qnli
+- adv_qqp
+- adv_rte
+- adv_sst2
 ---
 
 # Dataset Card for Adversarial GLUE

diff --git a/datasets/alt/README.md b/datasets/alt/README.md
@@ -23,20 +23,8 @@ multilinguality:
 - multilingual
 - translation
 size_categories:
-  alt-en:
-  - 10K<n<100K
-  alt-jp:
-  - 10K<n<100K
-  alt-km:
-  - 10K<n<100K
-  alt-my:
-  - 10K<n<100K
-  alt-my-transliteration:
-  - 10K<n<100K
-  alt-my-west-transliteration:
-  - 100K<n<1M
-  alt-parallel:
-  - 10K<n<100K
+- 100K<n<1M
+- 10K<n<100K
 source_datasets:
 - original
 task_categories:
@@ -46,6 +34,14 @@ task_ids:
 - parsing
 paperswithcode_id: alt
 pretty_name: Asian Language Treebank
+configs:
+- alt-en
+- alt-jp
+- alt-km
+- alt-my
+- alt-my-transliteration
+- alt-my-west-transliteration
+- alt-parallel
 ---
 
 # Dataset Card for Asian Language Treebank (ALT)

diff --git a/datasets/amazon_reviews_multi/README.md b/datasets/amazon_reviews_multi/README.md
@@ -4,57 +4,20 @@ annotations_creators:
 language_creators:
 - found
 languages:
-  all_languages:
-  - de
-  - en
-  - es
-  - fr
-  - ja
-  - zh
-  de:
-  - de
-  en:
-  - en
-  es:
-  - es
-  fr:
-  - fr
-  ja:
-  - ja
-  zh:
-  - zh
+- de
+- en
+- es
+- fr
+- ja
+- zh
 licenses:
 - other-amazon-license
 multilinguality:
-  all_languages:
-  - multilingual
-  de:
-  - monolingual
-  en:
-  - monolingual
-  es:
-  - monolingual
-  fr:
-  - monolingual
-  ja:
-  - monolingual
-  zh:
-  - monolingual
+- monolingual
+- multilingual
 size_categories:
-  all_languages:
-  - 1M<n<10M
-  de:
-  - 100K<n<1M
-  en:
-  - 100K<n<1M
-  es:
-  - 100K<n<1M
-  fr:
-  - 100K<n<1M
-  ja:
-  - 100K<n<1M
-  zh:
-  - 100K<n<1M
+- 100K<n<1M
+- 1M<n<10M
 source_datasets:
 - original
 task_categories:
@@ -71,6 +34,14 @@ task_ids:
 - topic-classification
 paperswithcode_id: null
 pretty_name: The Multilingual Amazon Reviews Corpus
+configs:
+- all_languages
+- de
+- en
+- es
+- fr
+- ja
+- zh
 ---
 
 # Dataset Card for The Multilingual Amazon Reviews Corpus

diff --git a/datasets/arabic_billion_words/README.md b/datasets/arabic_billion_words/README.md
@@ -10,26 +10,9 @@ licenses:
 multilinguality:
 - monolingual
 size_categories:
-  Alittihad:
-  - 100K<n<1M
-  Almasryalyoum:
-  - 100K<n<1M
-  Almustaqbal:
-  - 100K<n<1M
-  Alqabas:
-  - 100K<n<1M
-  Echoroukonline:
-  - 100K<n<1M
-  Ryiadh:
-  - 100K<n<1M
-  Sabanews:
-  - 10K<n<100K
-  SaudiYoum:
-  - 100K<n<1M
-  Techreen:
-  - 100K<n<1M
-  Youm7:
-  - 1M<n<10M
+- 100K<n<1M
+- 10K<n<100K
+- 1M<n<10M
 source_datasets:
 - original
 task_categories:
@@ -40,6 +23,17 @@ task_ids:
 - masked-language-modeling
 paperswithcode_id: null
 pretty_name: Arabic Billion Words
+configs:
+- Alittihad
+- Almasryalyoum
+- Almustaqbal
+- Alqabas
+- Echoroukonline
+- Ryiadh
+- Sabanews
+- SaudiYoum
+- Techreen
+- Youm7
 ---
 
 # Dataset Card for Arabic Billion Words Corpus

diff --git a/datasets/asset/README.md b/datasets/asset/README.md
@@ -15,17 +15,16 @@ source_datasets:
 - original
 - extended|other-turkcorpus
 task_categories:
-  ratings:
-  - text-classification
-  simplification:
-  - text2text-generation
+- text-classification
+- text2text-generation
 task_ids:
-  ratings:
-  - text-classification-other-simplification-evaluation
-  simplification:
-  - text-simplification
+- text-classification-other-simplification-evaluation
+- text-simplification
 paperswithcode_id: asset
 pretty_name: ASSET
+configs:
+- ratings
+- simplification
 ---
 
 # Dataset Card for ASSET

diff --git a/datasets/autshumato/README.md b/datasets/autshumato/README.md
@@ -4,46 +4,31 @@ annotations_creators:
 language_creators:
 - expert-generated
 languages:
-  autshumato-en-ts-manual:
-  - en
-  - ts
-  autshumato-en-tn:
-  - en
-  - tn
-  autshumato-en-ts:
-  - en
-  - ts
-  autshumato-en-zu:
-  - en
-  - zu
-  autshumato-tn:
-  - tn
-  autshumato-ts:
-  - ts
+- en
+- tn
+- ts
+- zu
 licenses:
 - cc-by-2.5
 multilinguality:
 - multilingual
 size_categories:
-  autshumato-en-tn:
-  - 100K<n<1M
-  autshumato-en-ts:
-  - 100K<n<1M
-  autshumato-en-ts-manual:
-  - 10K<n<100K
-  autshumato-en-zu:
-  - 10K<n<100K
-  autshumato-tn:
-  - 10K<n<100K
-  autshumato-ts:
-  - 10K<n<100K
+- 100K<n<1M
+- 10K<n<100K
 source_datasets:
 - original
 task_categories:
 - translation
 task_ids: []
 paperswithcode_id: null
 pretty_name: autshumato
+configs:
+- autshumato-en-tn
+- autshumato-en-ts
+- autshumato-en-ts-manual
+- autshumato-en-zu
+- autshumato-tn
+- autshumato-ts
 ---
 
 # Dataset Card Creation Guide