Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions .github/hub/update_hub_repositories.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,17 +105,6 @@ def check_authorizations(user_info: dict):
)


def apply_hacks_for_moon_landing(dataset_repo_path: Path):
if (dataset_repo_path / "README.md").is_file():
with (dataset_repo_path / "README.md").open() as f:
readme_content = f.read()
if readme_content.count("---\n") > 1:
_, tags, content = readme_content.split("---\n", 2)
tags = tags.replace("\nlicense:", "\nlicenses:").replace(".", "-").replace("$", "%")
with (dataset_repo_path / "README.md").open("w") as f:
f.write("---\n".join(["", tags, content]))


class update_main:
def __init__(
self,
Expand Down
29 changes: 11 additions & 18 deletions datasets/ade_corpus_v2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,17 @@ licenses:
multilinguality:
- monolingual
size_categories:
Ade_corpus_v2_classification:
- 10K<n<100K
Ade_corpus_v2_drug_ade_relation:
- 1K<n<10K
Ade_corpus_v2_drug_dosage_relation:
- n<1K
- 10K<n<100K
- 1K<n<10K
- n<1K
source_datasets:
- original
task_categories:
Ade_corpus_v2_classification:
- text-classification
Ade_corpus_v2_drug_ade_relation:
- token-classification
Ade_corpus_v2_drug_dosage_relation:
- token-classification
- text-classification
- token-classification
task_ids:
Ade_corpus_v2_classification:
- fact-checking
Ade_corpus_v2_drug_ade_relation:
- coreference-resolution
Ade_corpus_v2_drug_dosage_relation:
- coreference-resolution
- coreference-resolution
- fact-checking
paperswithcode_id: null
pretty_name: Adverse Drug Reaction Data v2
train-eval-index:
Expand Down Expand Up @@ -82,6 +71,10 @@ train-eval-index:
name: Recall weighted
args:
average: weighted
configs:
- Ade_corpus_v2_classification
- Ade_corpus_v2_drug_ade_relation
- Ade_corpus_v2_drug_dosage_relation
---

# Dataset Card for Adverse Drug Reaction Data v2
Expand Down
36 changes: 12 additions & 24 deletions datasets/adv_glue/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,31 +15,19 @@ size_categories:
source_datasets:
- extended|glue
task_categories:
adv_mnli:
- text-classification
adv_mnli_mismatched:
- text-classification
adv_qnli:
- text-classification
adv_qqp:
- text-classification
adv_rte:
- text-classification
adv_sst2:
- text-classification
- text-classification
task_ids:
adv_mnli:
- natural-language-inference
adv_mnli_mismatched:
- natural-language-inference
adv_qnli:
- text-classification-other-qa-nli
adv_qqp:
- text-classification-other-paraphrase-identification
adv_rte:
- natural-language-inference
adv_sst2:
- sentiment-classification
- natural-language-inference
- sentiment-classification
- text-classification-other-paraphrase-identification
- text-classification-other-qa-nli
configs:
- adv_mnli
- adv_mnli_mismatched
- adv_qnli
- adv_qqp
- adv_rte
- adv_sst2
---

# Dataset Card for Adversarial GLUE
Expand Down
24 changes: 10 additions & 14 deletions datasets/alt/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,8 @@ multilinguality:
- multilingual
- translation
size_categories:
alt-en:
- 10K<n<100K
alt-jp:
- 10K<n<100K
alt-km:
- 10K<n<100K
alt-my:
- 10K<n<100K
alt-my-transliteration:
- 10K<n<100K
alt-my-west-transliteration:
- 100K<n<1M
alt-parallel:
- 10K<n<100K
- 100K<n<1M
- 10K<n<100K
source_datasets:
- original
task_categories:
Expand All @@ -46,6 +34,14 @@ task_ids:
- parsing
paperswithcode_id: alt
pretty_name: Asian Language Treebank
configs:
- alt-en
- alt-jp
- alt-km
- alt-my
- alt-my-transliteration
- alt-my-west-transliteration
- alt-parallel
---

# Dataset Card for Asian Language Treebank (ALT)
Expand Down
65 changes: 18 additions & 47 deletions datasets/amazon_reviews_multi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,57 +4,20 @@ annotations_creators:
language_creators:
- found
languages:
all_languages:
- de
- en
- es
- fr
- ja
- zh
de:
- de
en:
- en
es:
- es
fr:
- fr
ja:
- ja
zh:
- zh
- de
- en
- es
- fr
- ja
- zh
licenses:
- other-amazon-license
multilinguality:
all_languages:
- multilingual
de:
- monolingual
en:
- monolingual
es:
- monolingual
fr:
- monolingual
ja:
- monolingual
zh:
- monolingual
- monolingual
- multilingual
size_categories:
all_languages:
- 1M<n<10M
de:
- 100K<n<1M
en:
- 100K<n<1M
es:
- 100K<n<1M
fr:
- 100K<n<1M
ja:
- 100K<n<1M
zh:
- 100K<n<1M
- 100K<n<1M
- 1M<n<10M
source_datasets:
- original
task_categories:
Expand All @@ -71,6 +34,14 @@ task_ids:
- topic-classification
paperswithcode_id: null
pretty_name: The Multilingual Amazon Reviews Corpus
configs:
- all_languages
- de
- en
- es
- fr
- ja
- zh
---

# Dataset Card for The Multilingual Amazon Reviews Corpus
Expand Down
34 changes: 14 additions & 20 deletions datasets/arabic_billion_words/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,9 @@ licenses:
multilinguality:
- monolingual
size_categories:
Alittihad:
- 100K<n<1M
Almasryalyoum:
- 100K<n<1M
Almustaqbal:
- 100K<n<1M
Alqabas:
- 100K<n<1M
Echoroukonline:
- 100K<n<1M
Ryiadh:
- 100K<n<1M
Sabanews:
- 10K<n<100K
SaudiYoum:
- 100K<n<1M
Techreen:
- 100K<n<1M
Youm7:
- 1M<n<10M
- 100K<n<1M
- 10K<n<100K
- 1M<n<10M
source_datasets:
- original
task_categories:
Expand All @@ -40,6 +23,17 @@ task_ids:
- masked-language-modeling
paperswithcode_id: null
pretty_name: Arabic Billion Words
configs:
- Alittihad
- Almasryalyoum
- Almustaqbal
- Alqabas
- Echoroukonline
- Ryiadh
- Sabanews
- SaudiYoum
- Techreen
- Youm7
---

# Dataset Card for Arabic Billion Words Corpus
Expand Down
15 changes: 7 additions & 8 deletions datasets/asset/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,16 @@ source_datasets:
- original
- extended|other-turkcorpus
task_categories:
ratings:
- text-classification
simplification:
- text2text-generation
- text-classification
- text2text-generation
task_ids:
ratings:
- text-classification-other-simplification-evaluation
simplification:
- text-simplification
- text-classification-other-simplification-evaluation
- text-simplification
paperswithcode_id: asset
pretty_name: ASSET
configs:
- ratings
- simplification
---

# Dataset Card for ASSET
Expand Down
41 changes: 13 additions & 28 deletions datasets/autshumato/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,46 +4,31 @@ annotations_creators:
language_creators:
- expert-generated
languages:
autshumato-en-ts-manual:
- en
- ts
autshumato-en-tn:
- en
- tn
autshumato-en-ts:
- en
- ts
autshumato-en-zu:
- en
- zu
autshumato-tn:
- tn
autshumato-ts:
- ts
- en
- tn
- ts
- zu
licenses:
- cc-by-2.5
multilinguality:
- multilingual
size_categories:
autshumato-en-tn:
- 100K<n<1M
autshumato-en-ts:
- 100K<n<1M
autshumato-en-ts-manual:
- 10K<n<100K
autshumato-en-zu:
- 10K<n<100K
autshumato-tn:
- 10K<n<100K
autshumato-ts:
- 10K<n<100K
- 100K<n<1M
- 10K<n<100K
source_datasets:
- original
task_categories:
- translation
task_ids: []
paperswithcode_id: null
pretty_name: autshumato
configs:
- autshumato-en-tn
- autshumato-en-ts
- autshumato-en-ts-manual
- autshumato-en-zu
- autshumato-tn
- autshumato-ts
---

# Dataset Card Creation Guide
Expand Down
Loading