From 97a3db8f74ef509e9d97b89fa0c65d2da8a51622 Mon Sep 17 00:00:00 2001 From: Bhavitvya Malik Date: Sat, 22 May 2021 14:46:10 +0530 Subject: [PATCH 1/6] pretty_name first10 --- datasets/acronym_identification/README.md | 2 ++ datasets/ade_corpus_v2/README.md | 4 ++++ datasets/adversarial_qa/README.md | 7 ++++++- datasets/afrikaans_ner_corpus/README.md | 4 +++- datasets/ag_news/README.md | 4 +++- datasets/air_dialogue/README.md | 5 ++++- datasets/ajgt_twitter_ar/README.md | 4 +++- datasets/allegro_reviews/README.md | 8 +++++--- 8 files changed, 30 insertions(+), 8 deletions(-) diff --git a/datasets/acronym_identification/README.md b/datasets/acronym_identification/README.md index 82e51fef954..157f2474c20 100644 --- a/datasets/acronym_identification/README.md +++ b/datasets/acronym_identification/README.md @@ -17,6 +17,8 @@ task_categories: - structure-prediction task_ids: - structure-prediction-other-acronym-identification +pretty_name: + default: Acronym Identification Dataset --- # Dataset Card for Acronym Identification Dataset diff --git a/datasets/ade_corpus_v2/README.md b/datasets/ade_corpus_v2/README.md index 17c9d6a51c3..aaad6bd5a13 100644 --- a/datasets/ade_corpus_v2/README.md +++ b/datasets/ade_corpus_v2/README.md @@ -32,6 +32,10 @@ task_ids: - coreference-resolution Ade_corpus_v2_drug_dosage_relation: - coreference-resolution +pretty_name: + Ade_corpus_v2_classification: Adverse Drug Reaction Data v2 Ade_corpus_v2_classification + Ade_corpus_v2_drug_ade_relation: Adverse Drug Reaction Data v2 Ade_corpus_v2_drug_ade_relation + Ade_corpus_v2_drug_dosage_relation: Adverse Drug Reaction Data v2 Ade_corpus_v2_drug_dosage_relation --- # Dataset Card for Adverse Drug Reaction Data v2 diff --git a/datasets/adversarial_qa/README.md b/datasets/adversarial_qa/README.md index aa4b318da8b..25ec4298879 100644 --- a/datasets/adversarial_qa/README.md +++ b/datasets/adversarial_qa/README.md @@ -18,6 +18,11 @@ task_categories: task_ids: - extractive-qa - open-domain-qa +pretty_name: + adversarialQA: adversarialQA adversarialQA + dbert: adversarialQA dbert + dbidaf: adversarialQA dbidaf + droberta: adversarialQA droberta --- # Dataset Card for adversarialQA @@ -222,4 +227,4 @@ This dataset is distributed under [CC BY-SA 3.0](https://creativecommons.org/lic ``` ### Contributions -Thanks to [@maxbartolo](https://github.com/maxbartolo) for adding this dataset. \ No newline at end of file +Thanks to [@maxbartolo](https://github.com/maxbartolo) for adding this dataset. diff --git a/datasets/afrikaans_ner_corpus/README.md b/datasets/afrikaans_ner_corpus/README.md index 1133c68760e..0249cc83b61 100644 --- a/datasets/afrikaans_ner_corpus/README.md +++ b/datasets/afrikaans_ner_corpus/README.md @@ -17,6 +17,8 @@ task_categories: - structure-prediction task_ids: - named-entity-recognition +pretty_name: + afrikaans_ner_corpus: Afrikaans Ner Corpus --- # Dataset Card for Afrikaans Ner Corpus @@ -170,4 +172,4 @@ The data is under the [Creative Commons Attribution 2.5 South Africa License](ht ``` ### Contributions -Thanks to [@yvonnegitau](https://github.com/yvonnegitau) for adding this dataset. \ No newline at end of file +Thanks to [@yvonnegitau](https://github.com/yvonnegitau) for adding this dataset. diff --git a/datasets/ag_news/README.md b/datasets/ag_news/README.md index be503aff66c..65c5c3589b4 100644 --- a/datasets/ag_news/README.md +++ b/datasets/ag_news/README.md @@ -17,6 +17,8 @@ task_categories: - text-classification task_ids: - topic-classification +pretty_name: + default: '"ag_news"' --- # Dataset Card for "ag_news" @@ -183,4 +185,4 @@ The data fields are the same among all splits. ### Contributions -Thanks to [@jxmorris12](https://github.com/jxmorris12), [@thomwolf](https://github.com/thomwolf), [@lhoestq](https://github.com/lhoestq), [@lewtun](https://github.com/lewtun) for adding this dataset. \ No newline at end of file +Thanks to [@jxmorris12](https://github.com/jxmorris12), [@thomwolf](https://github.com/thomwolf), [@lhoestq](https://github.com/lhoestq), [@lewtun](https://github.com/lewtun) for adding this dataset. diff --git a/datasets/air_dialogue/README.md b/datasets/air_dialogue/README.md index 1c702cac975..18b976c1a05 100644 --- a/datasets/air_dialogue/README.md +++ b/datasets/air_dialogue/README.md @@ -20,6 +20,9 @@ task_ids: - conditional-text-generation-other-dialogue-generation - dialogue-modeling - language-modeling +pretty_name: + air_dialogue_data: air_dialogue air_dialogue_data + air_dialogue_kb: air_dialogue air_dialogue_kb --- # Dataset Card for air_dialogue @@ -198,4 +201,4 @@ cc-by-nc-4.0 ### Contributions -Thanks to [@skyprince999](https://github.com/skyprince999) for adding this dataset. \ No newline at end of file +Thanks to [@skyprince999](https://github.com/skyprince999) for adding this dataset. diff --git a/datasets/ajgt_twitter_ar/README.md b/datasets/ajgt_twitter_ar/README.md index a1297d1f81a..74027f2f08d 100644 --- a/datasets/ajgt_twitter_ar/README.md +++ b/datasets/ajgt_twitter_ar/README.md @@ -17,6 +17,8 @@ task_categories: - text-classification task_ids: - sentiment-classification +pretty_name: + plain_text: MetRec --- # Dataset Card for MetRec @@ -142,4 +144,4 @@ The dataset does not contain any additional annotations. ### Contributions -Thanks to [@zaidalyafeai](https://github.com/zaidalyafeai), [@lhoestq](https://github.com/lhoestq) for adding this dataset. \ No newline at end of file +Thanks to [@zaidalyafeai](https://github.com/zaidalyafeai), [@lhoestq](https://github.com/lhoestq) for adding this dataset. diff --git a/datasets/allegro_reviews/README.md b/datasets/allegro_reviews/README.md index 3eb329bd1d4..4f3fb6232e1 100644 --- a/datasets/allegro_reviews/README.md +++ b/datasets/allegro_reviews/README.md @@ -17,9 +17,11 @@ task_categories: - text-scoring task_ids: - sentiment-scoring +pretty_name: + default: Allegro Reviews --- -# Dataset Card for [Dataset Name] +# Dataset Card for Allegro Reviews ## Table of Contents - [Dataset Description](#dataset-description) @@ -77,7 +79,7 @@ Polish ### Data Instances -Two tsv files (train, dev) with two columns (text, rating) and one (test) with just one (text). +Two tsv files (train, dev) with two columns (text, rating) and one (test) with just one (text). ### Data Fields @@ -158,4 +160,4 @@ Dataset licensed under CC BY-SA 4.0 ### Contributions -Thanks to [@abecadel](https://github.com/abecadel) for adding this dataset. \ No newline at end of file +Thanks to [@abecadel](https://github.com/abecadel) for adding this dataset. From 48ef4e9e3365926102c44fb18f32a4e6b056bbb0 Mon Sep 17 00:00:00 2001 From: Bhavitvya Malik Date: Sat, 22 May 2021 14:57:20 +0530 Subject: [PATCH 2/6] minor addition --- datasets/acronym_identification/README.md | 2 +- datasets/air_dialogue/README.md | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/datasets/acronym_identification/README.md b/datasets/acronym_identification/README.md index 157f2474c20..45c49eeffa3 100644 --- a/datasets/acronym_identification/README.md +++ b/datasets/acronym_identification/README.md @@ -118,7 +118,7 @@ The training, validation, and test set contain `14,006`, `1,717`, and `1750` sen > This is unfortunate as rules are in general not able to capture all the diverse forms to express acronyms and their long forms in text. > Second, most of the existing datasets are in the medical domain, ignoring the challenges in other scientific domains. > In order to address these limitations this paper introduces two new datasets for Acronym Identification. -> Notably, our datasets are annotated by human to achieve high quality and have substantially larger numbers of examples than the existing AI datasets in the non-medical domain. +> Notably, our datasets are annotated by human to achieve high quality and have substantially larger numbers of examples than the existing AI datasets in the non-medical domain. ### Source Data diff --git a/datasets/air_dialogue/README.md b/datasets/air_dialogue/README.md index 18b976c1a05..21daf8da4e0 100644 --- a/datasets/air_dialogue/README.md +++ b/datasets/air_dialogue/README.md @@ -21,8 +21,8 @@ task_ids: - dialogue-modeling - language-modeling pretty_name: - air_dialogue_data: air_dialogue air_dialogue_data - air_dialogue_kb: air_dialogue air_dialogue_kb + air_dialogue_data: air_dialogue (air_dialogue_data) + air_dialogue_kb: air_dialogue (air_dialogue_kb) --- # Dataset Card for air_dialogue @@ -57,7 +57,7 @@ pretty_name: - **Repository:** https://github.com/google/airdialogue - **Paper:** https://www.aclweb.org/anthology/D18-1419/ - **Leaderboard:** https://worksheets.codalab.org/worksheets/0xa79833f4b3c24f4188cee7131b120a59 -- **Point of Contact:** [AirDialogue-Google](mailto:airdialogue@gmail.com) +- **Point of Contact:** [AirDialogue-Google](mailto:airdialogue@gmail.com) [Aakash Gupta](mailto:aakashg80@gmail.com) ### Dataset Summary From 9114a8263f9bc1a1a0f90c3a560f399c207c2121 Mon Sep 17 00:00:00 2001 From: Bhavitvya Malik Date: Sat, 22 May 2021 17:52:01 +0530 Subject: [PATCH 3/6] change values to pass tests --- datasets/acronym_identification/README.md | 3 +- datasets/ade_corpus_v2/README.md | 43 ++++++++++++----------- datasets/adversarial_qa/README.md | 12 ++++--- datasets/afrikaans_ner_corpus/README.md | 5 +-- datasets/ag_news/README.md | 3 +- datasets/air_dialogue/README.md | 6 ++-- datasets/ajgt_twitter_ar/README.md | 19 +++++----- datasets/allegro_reviews/README.md | 3 +- src/datasets/utils/metadata.py | 1 + 9 files changed, 55 insertions(+), 40 deletions(-) diff --git a/datasets/acronym_identification/README.md b/datasets/acronym_identification/README.md index 45c49eeffa3..a087a885799 100644 --- a/datasets/acronym_identification/README.md +++ b/datasets/acronym_identification/README.md @@ -18,7 +18,8 @@ task_categories: task_ids: - structure-prediction-other-acronym-identification pretty_name: - default: Acronym Identification Dataset + default: + - Acronym Identification Dataset --- # Dataset Card for Acronym Identification Dataset diff --git a/datasets/ade_corpus_v2/README.md b/datasets/ade_corpus_v2/README.md index aaad6bd5a13..ead1f40b71b 100644 --- a/datasets/ade_corpus_v2/README.md +++ b/datasets/ade_corpus_v2/README.md @@ -33,9 +33,12 @@ task_ids: Ade_corpus_v2_drug_dosage_relation: - coreference-resolution pretty_name: - Ade_corpus_v2_classification: Adverse Drug Reaction Data v2 Ade_corpus_v2_classification - Ade_corpus_v2_drug_ade_relation: Adverse Drug Reaction Data v2 Ade_corpus_v2_drug_ade_relation - Ade_corpus_v2_drug_dosage_relation: Adverse Drug Reaction Data v2 Ade_corpus_v2_drug_dosage_relation + Ade_corpus_v2_classification: + - Adverse Drug Reaction Data v2 (Ade_corpus_v2_classification) + Ade_corpus_v2_drug_ade_relation: + - Adverse Drug Reaction Data v2 (Ade_corpus_v2_drug_ade_relation) + Ade_corpus_v2_drug_dosage_relation: + - Adverse Drug Reaction Data v2 (Ade_corpus_v2_drug_dosage_relation) --- # Dataset Card for Adverse Drug Reaction Data v2 @@ -95,7 +98,7 @@ English #### Config - `Ade_corpus_v2_classification` ``` { - 'label': 1, + 'label': 1, 'text': 'Intravenous azithromycin-induced ototoxicity.' } @@ -104,21 +107,21 @@ English #### Config - `Ade_corpus_v2_drug_ade_relation` ``` -{ - 'drug': 'azithromycin', - 'effect': 'ototoxicity', +{ + 'drug': 'azithromycin', + 'effect': 'ototoxicity', 'indexes': { 'drug': { - 'end_char': [24], + 'end_char': [24], 'start_char': [12] - }, + }, 'effect': { - 'end_char': [44], + 'end_char': [44], 'start_char': [33] } - }, + }, 'text': 'Intravenous azithromycin-induced ototoxicity.' - + } ``` @@ -127,17 +130,17 @@ English ``` { - 'dosage': '4 times per day', - 'drug': 'insulin', + 'dosage': '4 times per day', + 'drug': 'insulin', 'indexes': { 'dosage': { - 'end_char': [56], + 'end_char': [56], 'start_char': [41] - }, + }, 'drug': { - 'end_char': [40], + 'end_char': [40], 'start_char': [33]} - }, + }, 'text': 'She continued to receive regular insulin 4 times per day over the following 3 years with only occasional hives.' } @@ -150,7 +153,7 @@ English - `text` - Input text. - `label` - Whether the adverse drug effect(ADE) related (1) or not (0). -- +- #### Config - `Ade_corpus_v2_drug_ade_relation` - `text` - Input text. @@ -175,7 +178,7 @@ English ### Data Splits | Train | -| ------ | +| ------ | | 23516 | ## Dataset Creation diff --git a/datasets/adversarial_qa/README.md b/datasets/adversarial_qa/README.md index 25ec4298879..07c2b5820ad 100644 --- a/datasets/adversarial_qa/README.md +++ b/datasets/adversarial_qa/README.md @@ -19,10 +19,14 @@ task_ids: - extractive-qa - open-domain-qa pretty_name: - adversarialQA: adversarialQA adversarialQA - dbert: adversarialQA dbert - dbidaf: adversarialQA dbidaf - droberta: adversarialQA droberta + adversarialQA: + - adversarialQA (adversarialQA) + dbert: + - adversarialQA (dbert) + dbidaf: + - adversarialQA (dbidaf) + droberta: + - adversarialQA (droberta) --- # Dataset Card for adversarialQA diff --git a/datasets/afrikaans_ner_corpus/README.md b/datasets/afrikaans_ner_corpus/README.md index 0249cc83b61..20b87e7192f 100644 --- a/datasets/afrikaans_ner_corpus/README.md +++ b/datasets/afrikaans_ner_corpus/README.md @@ -18,7 +18,8 @@ task_categories: task_ids: - named-entity-recognition pretty_name: - afrikaans_ner_corpus: Afrikaans Ner Corpus + afrikaans_ner_corpus: + - Afrikaans Ner Corpus --- # Dataset Card for Afrikaans Ner Corpus @@ -70,7 +71,7 @@ The language supported is Afrikaans. ### Data Instances -A data point consists of sentences seperated by empty line and tab-seperated tokens and tags. +A data point consists of sentences seperated by empty line and tab-seperated tokens and tags. {'id': '0', 'ner_tags': [0, 0, 0, 0, 0], 'tokens': ['Vertaling', 'van', 'die', 'inligting', 'in'] diff --git a/datasets/ag_news/README.md b/datasets/ag_news/README.md index 65c5c3589b4..9088e6d3911 100644 --- a/datasets/ag_news/README.md +++ b/datasets/ag_news/README.md @@ -18,7 +18,8 @@ task_categories: task_ids: - topic-classification pretty_name: - default: '"ag_news"' + default: + - '"ag_news"' --- # Dataset Card for "ag_news" diff --git a/datasets/air_dialogue/README.md b/datasets/air_dialogue/README.md index 21daf8da4e0..7e7f66342b3 100644 --- a/datasets/air_dialogue/README.md +++ b/datasets/air_dialogue/README.md @@ -21,8 +21,10 @@ task_ids: - dialogue-modeling - language-modeling pretty_name: - air_dialogue_data: air_dialogue (air_dialogue_data) - air_dialogue_kb: air_dialogue (air_dialogue_kb) + air_dialogue_data: + - air_dialogue (air_dialogue_data) + air_dialogue_kb: + - air_dialogue (air_dialogue_kb) --- # Dataset Card for air_dialogue diff --git a/datasets/ajgt_twitter_ar/README.md b/datasets/ajgt_twitter_ar/README.md index 74027f2f08d..c2fbe930659 100644 --- a/datasets/ajgt_twitter_ar/README.md +++ b/datasets/ajgt_twitter_ar/README.md @@ -18,7 +18,8 @@ task_categories: task_ids: - sentiment-classification pretty_name: - plain_text: MetRec + plain_text: + - MetRec --- # Dataset Card for MetRec @@ -60,7 +61,7 @@ Arabic Jordanian General Tweets (AJGT) Corpus consisted of 1,800 tweets annotate ### Supported Tasks and Leaderboards -The dataset was published on this [paper](https://link.springer.com/chapter/10.1007/978-3-319-60042-0_66). +The dataset was published on this [paper](https://link.springer.com/chapter/10.1007/978-3-319-60042-0_66). ### Languages @@ -70,7 +71,7 @@ The dataset is based on Arabic. ### Data Instances -A binary datset with with negative and positive sentiments. +A binary datset with with negative and positive sentiments. ### Data Fields @@ -78,11 +79,11 @@ A binary datset with with negative and positive sentiments. ### Data Splits -The dataset is not split. +The dataset is not split. -| | Tain | -|---------- | ------ | -|no split | 1,800 | +| | Tain | +|---------- | ------ | +|no split | 1,800 | ## Dataset Creation @@ -96,11 +97,11 @@ The dataset is not split. #### Initial Data Collection and Normalization -Contains 1,800 tweets collected from twitter. +Contains 1,800 tweets collected from twitter. #### Who are the source language producers? -From tweeter. +From tweeter. ### Annotations diff --git a/datasets/allegro_reviews/README.md b/datasets/allegro_reviews/README.md index 4f3fb6232e1..4a1d49d81bc 100644 --- a/datasets/allegro_reviews/README.md +++ b/datasets/allegro_reviews/README.md @@ -18,7 +18,8 @@ task_categories: task_ids: - sentiment-scoring pretty_name: - default: Allegro Reviews + default: + - Allegro Reviews --- # Dataset Card for Allegro Reviews diff --git a/src/datasets/utils/metadata.py b/src/datasets/utils/metadata.py index 9594be95d6b..8dc81f26b6d 100644 --- a/src/datasets/utils/metadata.py +++ b/src/datasets/utils/metadata.py @@ -119,6 +119,7 @@ class DatasetMetadata: source_datasets: List[str] task_categories: List[str] task_ids: List[str] + pretty_name: List[str] def __post_init__(self): validate_metadata_type(metadata_dict=vars(self)) From 9b8012543613a395132f40fee7fb8a376c80a2a9 Mon Sep 17 00:00:00 2001 From: Bhavitvya Malik Date: Mon, 21 Jun 2021 19:59:38 +0530 Subject: [PATCH 4/6] new format pretty_names --- datasets/acronym_identification/README.md | 1 + datasets/ade_corpus_v2/README.md | 4 ++++ datasets/adversarial_qa/README.md | 5 +++++ datasets/aeslc/README.md | 3 ++- datasets/afrikaans_ner_corpus/README.md | 1 + datasets/ag_news/README.md | 1 + datasets/ai2_arc/README.md | 3 +++ datasets/air_dialogue/README.md | 3 +++ datasets/ajgt_twitter_ar/README.md | 7 +------ datasets/allegro_reviews/README.md | 1 + 10 files changed, 22 insertions(+), 7 deletions(-) diff --git a/datasets/acronym_identification/README.md b/datasets/acronym_identification/README.md index c29f53d815b..b01b4ae4c8e 100644 --- a/datasets/acronym_identification/README.md +++ b/datasets/acronym_identification/README.md @@ -18,6 +18,7 @@ task_categories: task_ids: - structure-prediction-other-acronym-identification paperswithcode_id: acronym-identification +pretty_name: Acronym Identification Dataset --- # Dataset Card for Acronym Identification Dataset diff --git a/datasets/ade_corpus_v2/README.md b/datasets/ade_corpus_v2/README.md index 83bef3f5be4..e817e3564dd 100644 --- a/datasets/ade_corpus_v2/README.md +++ b/datasets/ade_corpus_v2/README.md @@ -33,6 +33,10 @@ task_ids: Ade_corpus_v2_drug_dosage_relation: - coreference-resolution paperswithcode_id: null +pretty_name: + Ade_corpus_v2_classification: Adverse Drug Reaction Data v2 (Ade_corpus_v2_classification) + Ade_corpus_v2_drug_ade_relation: Adverse Drug Reaction Data v2 (Ade_corpus_v2_drug_ade_relation) + Ade_corpus_v2_drug_dosage_relation: Adverse Drug Reaction Data v2 (Ade_corpus_v2_drug_dosage_relation) --- # Dataset Card for Adverse Drug Reaction Data v2 diff --git a/datasets/adversarial_qa/README.md b/datasets/adversarial_qa/README.md index 436fb5939a3..9148ad07f6f 100644 --- a/datasets/adversarial_qa/README.md +++ b/datasets/adversarial_qa/README.md @@ -19,6 +19,11 @@ task_ids: - extractive-qa - open-domain-qa paperswithcode_id: adversarialqa +pretty_name: + adversarialQA: adversarialQA (adversarialQA) + dbert: adversarialQA (dbert) + dbidaf: adversarialQA (dbidaf) + droberta: adversarialQA (droberta) --- # Dataset Card for adversarialQA diff --git a/datasets/aeslc/README.md b/datasets/aeslc/README.md index e467e233c98..f3c0d0ebd99 100644 --- a/datasets/aeslc/README.md +++ b/datasets/aeslc/README.md @@ -2,6 +2,7 @@ languages: - en paperswithcode_id: aeslc +pretty_name: '"aeslc"' --- # Dataset Card for "aeslc" @@ -162,4 +163,4 @@ The data fields are the same among all splits. ### Contributions -Thanks to [@patrickvonplaten](https://github.com/patrickvonplaten), [@thomwolf](https://github.com/thomwolf), [@lewtun](https://github.com/lewtun) for adding this dataset. \ No newline at end of file +Thanks to [@patrickvonplaten](https://github.com/patrickvonplaten), [@thomwolf](https://github.com/thomwolf), [@lewtun](https://github.com/lewtun) for adding this dataset. diff --git a/datasets/afrikaans_ner_corpus/README.md b/datasets/afrikaans_ner_corpus/README.md index 91febb0b124..72e96d8db09 100644 --- a/datasets/afrikaans_ner_corpus/README.md +++ b/datasets/afrikaans_ner_corpus/README.md @@ -18,6 +18,7 @@ task_categories: task_ids: - named-entity-recognition paperswithcode_id: null +pretty_name: Afrikaans Ner Corpus --- # Dataset Card for Afrikaans Ner Corpus diff --git a/datasets/ag_news/README.md b/datasets/ag_news/README.md index e1ac05a2421..1ea24fd3495 100644 --- a/datasets/ag_news/README.md +++ b/datasets/ag_news/README.md @@ -18,6 +18,7 @@ task_categories: task_ids: - topic-classification paperswithcode_id: ag-news +pretty_name: '"ag_news"' --- # Dataset Card for "ag_news" diff --git a/datasets/ai2_arc/README.md b/datasets/ai2_arc/README.md index 8951b547bc5..b09ed9b5e6a 100644 --- a/datasets/ai2_arc/README.md +++ b/datasets/ai2_arc/README.md @@ -19,6 +19,9 @@ task_ids: - open-domain-qa - multiple-choice-qa paperswithcode_id: null +pretty_name: + ARC-Challenge: '"ai2_arc" (ARC-Challenge)' + ARC-Easy: '"ai2_arc" (ARC-Easy)' --- # Dataset Card for "ai2_arc" diff --git a/datasets/air_dialogue/README.md b/datasets/air_dialogue/README.md index f589876ce57..49849234780 100644 --- a/datasets/air_dialogue/README.md +++ b/datasets/air_dialogue/README.md @@ -21,6 +21,9 @@ task_ids: - dialogue-modeling - language-modeling paperswithcode_id: null +pretty_name: + air_dialogue_data: air_dialogue (air_dialogue_data) + air_dialogue_kb: air_dialogue (air_dialogue_kb) --- # Dataset Card for air_dialogue diff --git a/datasets/ajgt_twitter_ar/README.md b/datasets/ajgt_twitter_ar/README.md index d67f350f765..aaa0fab579e 100644 --- a/datasets/ajgt_twitter_ar/README.md +++ b/datasets/ajgt_twitter_ar/README.md @@ -17,13 +17,8 @@ task_categories: - text-classification task_ids: - sentiment-classification -<<<<<<< HEAD -pretty_name: - plain_text: - - MetRec -======= paperswithcode_id: null ->>>>>>> origin/master +pretty_name: MetRec --- # Dataset Card for MetRec diff --git a/datasets/allegro_reviews/README.md b/datasets/allegro_reviews/README.md index 7cc24963b23..e13168a1a4c 100644 --- a/datasets/allegro_reviews/README.md +++ b/datasets/allegro_reviews/README.md @@ -18,6 +18,7 @@ task_categories: task_ids: - sentiment-scoring paperswithcode_id: allegro-reviews +pretty_name: Allegro Reviews --- # Dataset Card for Allegro Reviews From 51228e8b15f74e92ec24b53f256d6ee7216b3ecf Mon Sep 17 00:00:00 2001 From: Bhavitvya Malik Date: Wed, 23 Jun 2021 21:19:25 +0530 Subject: [PATCH 5/6] quentin's suggestions (backup) --- datasets/ade_corpus_v2/README.md | 6 +++--- datasets/adversarial_qa/README.md | 8 ++++---- datasets/aeslc/README.md | 2 +- datasets/afrikaans_ner_corpus/README.md | 2 +- datasets/ag_news/README.md | 2 +- datasets/ai2_arc/README.md | 4 ++-- datasets/air_dialogue/README.md | 4 ++-- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/datasets/ade_corpus_v2/README.md b/datasets/ade_corpus_v2/README.md index e817e3564dd..951342d53e6 100644 --- a/datasets/ade_corpus_v2/README.md +++ b/datasets/ade_corpus_v2/README.md @@ -34,9 +34,9 @@ task_ids: - coreference-resolution paperswithcode_id: null pretty_name: - Ade_corpus_v2_classification: Adverse Drug Reaction Data v2 (Ade_corpus_v2_classification) - Ade_corpus_v2_drug_ade_relation: Adverse Drug Reaction Data v2 (Ade_corpus_v2_drug_ade_relation) - Ade_corpus_v2_drug_dosage_relation: Adverse Drug Reaction Data v2 (Ade_corpus_v2_drug_dosage_relation) + Ade_corpus_v2_classification: Adverse Drug Reaction Data v2 (Classification) + Ade_corpus_v2_drug_ade_relation: Adverse Drug Reaction Data v2 (AE relations) + Ade_corpus_v2_drug_dosage_relation: Adverse Drug Reaction Data v2 (Dosages relations) --- # Dataset Card for Adverse Drug Reaction Data v2 diff --git a/datasets/adversarial_qa/README.md b/datasets/adversarial_qa/README.md index 9148ad07f6f..1f12ede52cd 100644 --- a/datasets/adversarial_qa/README.md +++ b/datasets/adversarial_qa/README.md @@ -20,10 +20,10 @@ task_ids: - open-domain-qa paperswithcode_id: adversarialqa pretty_name: - adversarialQA: adversarialQA (adversarialQA) - dbert: adversarialQA (dbert) - dbidaf: adversarialQA (dbidaf) - droberta: adversarialQA (droberta) + adversarialQA: AdversarialQA (combined) + dbert: AdversarialQA (dbert) + dbidaf: AdversarialQA (dbidaf) + droberta: AdversarialQA (droberta) --- # Dataset Card for adversarialQA diff --git a/datasets/aeslc/README.md b/datasets/aeslc/README.md index f3c0d0ebd99..d081e7740c6 100644 --- a/datasets/aeslc/README.md +++ b/datasets/aeslc/README.md @@ -2,7 +2,7 @@ languages: - en paperswithcode_id: aeslc -pretty_name: '"aeslc"' +pretty_name: Annotated Enron Subject Line Corpus (AESLC) --- # Dataset Card for "aeslc" diff --git a/datasets/afrikaans_ner_corpus/README.md b/datasets/afrikaans_ner_corpus/README.md index 72e96d8db09..ce1c6261539 100644 --- a/datasets/afrikaans_ner_corpus/README.md +++ b/datasets/afrikaans_ner_corpus/README.md @@ -18,7 +18,7 @@ task_categories: task_ids: - named-entity-recognition paperswithcode_id: null -pretty_name: Afrikaans Ner Corpus +pretty_name: Afrikaans NER Corpus --- # Dataset Card for Afrikaans Ner Corpus diff --git a/datasets/ag_news/README.md b/datasets/ag_news/README.md index 1ea24fd3495..a49448df33f 100644 --- a/datasets/ag_news/README.md +++ b/datasets/ag_news/README.md @@ -18,7 +18,7 @@ task_categories: task_ids: - topic-classification paperswithcode_id: ag-news -pretty_name: '"ag_news"' +pretty_name: AG news --- # Dataset Card for "ag_news" diff --git a/datasets/ai2_arc/README.md b/datasets/ai2_arc/README.md index b09ed9b5e6a..dc573eb3b98 100644 --- a/datasets/ai2_arc/README.md +++ b/datasets/ai2_arc/README.md @@ -20,8 +20,8 @@ task_ids: - multiple-choice-qa paperswithcode_id: null pretty_name: - ARC-Challenge: '"ai2_arc" (ARC-Challenge)' - ARC-Easy: '"ai2_arc" (ARC-Easy)' + ARC-Challenge: AI2 Reasoning Challenge (ARC) 2018 (ARC-Challenge) + ARC-Easy: AI2 Reasoning Challenge (ARC) 2018 (ARC-Easy) --- # Dataset Card for "ai2_arc" diff --git a/datasets/air_dialogue/README.md b/datasets/air_dialogue/README.md index 49849234780..31deee6265d 100644 --- a/datasets/air_dialogue/README.md +++ b/datasets/air_dialogue/README.md @@ -22,8 +22,8 @@ task_ids: - language-modeling paperswithcode_id: null pretty_name: - air_dialogue_data: air_dialogue (air_dialogue_data) - air_dialogue_kb: air_dialogue (air_dialogue_kb) + air_dialogue_data: AirDialogue (air_dialogue_data) + air_dialogue_kb: AirDialogue (air_dialogue_kb) --- # Dataset Card for air_dialogue From d9db92d203c9d2f7ab65b5608dd949e936a4109f Mon Sep 17 00:00:00 2001 From: Bhavitvya Malik Date: Wed, 23 Jun 2021 21:39:55 +0530 Subject: [PATCH 6/6] pretty names for 1-100 --- datasets/allocine/README.md | 29 ++-- datasets/alt/README.md | 8 + datasets/amazon_polarity/README.md | 3 +- datasets/amazon_reviews_multi/README.md | 10 +- datasets/amazon_us_reviews/README.md | 49 +++++- datasets/ambig_qa/README.md | 7 +- datasets/amttl/README.md | 3 +- datasets/anli/README.md | 3 +- datasets/app_reviews/README.md | 3 +- datasets/aqua_rat/README.md | 3 + datasets/aquamuse/README.md | 5 +- datasets/ar_cov19/README.md | 3 +- datasets/ar_res_reviews/README.md | 3 +- datasets/ar_sarcasm/README.md | 1 + datasets/arabic_billion_words/README.md | 13 +- datasets/arabic_pos_dialect/README.md | 7 +- datasets/arabic_speech_corpus/README.md | 1 + datasets/arcd/README.md | 3 +- datasets/arsentd_lev/README.md | 3 +- datasets/art/README.md | 3 +- datasets/arxiv_dataset/README.md | 3 +- datasets/ascent_kb/README.md | 3 + datasets/aslg_pc12/README.md | 3 +- datasets/asnq/README.md | 3 +- datasets/asset/README.md | 5 +- datasets/assin/README.md | 6 +- datasets/assin2/README.md | 3 +- datasets/atomic/README.md | 4 +- datasets/autshumato/README.md | 9 +- datasets/babi_qa/README.md | 161 ++++++++++++++++++ datasets/banking77/README.md | 1 + datasets/bbaw_egyptian/README.md | 1 + datasets/bbc_hindi_nli/README.md | 1 + datasets/bc2gm_corpus/README.md | 3 +- datasets/best2009/README.md | 3 +- datasets/bianet/README.md | 6 +- datasets/bible_para/README.md | 9 +- datasets/big_patent/README.md | 13 +- datasets/billsum/README.md | 3 +- datasets/bing_coronavirus_query_set/README.md | 3 +- 40 files changed, 358 insertions(+), 45 deletions(-) diff --git a/datasets/allocine/README.md b/datasets/allocine/README.md index 149e9686492..29984ddd212 100644 --- a/datasets/allocine/README.md +++ b/datasets/allocine/README.md @@ -18,6 +18,7 @@ task_categories: task_ids: - sentiment-classification paperswithcode_id: allocine +pretty_name: "Allociné" --- # Dataset Card for Allociné @@ -48,7 +49,7 @@ paperswithcode_id: allocine ## Dataset Description -- **Homepage:** +- **Homepage:** - **Repository:** [Allociné dataset repository](https://github.com/TheophileBlard/french-sentiment-analysis-with-bert/tree/master/allocine_dataset) - **Paper:** - **Leaderboard:** @@ -56,11 +57,11 @@ paperswithcode_id: allocine ### Dataset Summary -The Allociné dataset is a French-language dataset for sentiment analysis. The texts are movie reviews written between 2006 and 2020 by members of the [Allociné.fr](https://www.allocine.fr/) community for various films. It contains 100k positive and 100k negative reviews divided into train (160k), validation (20k), and test (20k). +The Allociné dataset is a French-language dataset for sentiment analysis. The texts are movie reviews written between 2006 and 2020 by members of the [Allociné.fr](https://www.allocine.fr/) community for various films. It contains 100k positive and 100k negative reviews divided into train (160k), validation (20k), and test (20k). ### Supported Tasks and Leaderboards -- `text-classification`, `sentiment-classification`: The dataset can be used to train a model for sentiment classification. The model performance is evaluated based on the accuracy of the predicted labels as compared to the given labels in the dataset. A BERT-based model, [tf-allociné](https://huggingface.co/tblard/tf-allocine), achieves 97.44% accuracy on the test set. +- `text-classification`, `sentiment-classification`: The dataset can be used to train a model for sentiment classification. The model performance is evaluated based on the accuracy of the predicted labels as compared to the given labels in the dataset. A BERT-based model, [tf-allociné](https://huggingface.co/tblard/tf-allocine), achieves 97.44% accuracy on the test set. ### Languages @@ -86,7 +87,7 @@ An example from the Allociné train set looks like the following: ### Data Splits -The Allociné dataset has 3 splits: _train_, _validation_, and _test_. The splits contain disjoint sets of movies. The following table contains the number of reviews in each split and the percentage of positive and negative reviews. +The Allociné dataset has 3 splits: _train_, _validation_, and _test_. The splits contain disjoint sets of movies. The following table contains the number of reviews in each split and the percentage of positive and negative reviews. | Dataset Split | Number of Instances in Split | Percent Negative Reviews | Percent Positive Reviews | | ------------- | ---------------------------- | ------------------------ | ------------------------ | @@ -98,23 +99,23 @@ The Allociné dataset has 3 splits: _train_, _validation_, and _test_. The split ### Curation Rationale -The Allociné dataset was developed to support large-scale sentiment analysis in French. It was released alongside the [tf-allociné](https://huggingface.co/tblard/tf-allocine) model and used to compare the performance of several language models on this task. +The Allociné dataset was developed to support large-scale sentiment analysis in French. It was released alongside the [tf-allociné](https://huggingface.co/tblard/tf-allocine) model and used to compare the performance of several language models on this task. ### Source Data #### Initial Data Collection and Normalization -The reviews and ratings were collected using a list of [film page urls](https://github.com/TheophileBlard/french-sentiment-analysis-with-bert/blob/master/allocine_dataset/allocine_films_urls.txt) and the [allocine_scraper.py](https://github.com/TheophileBlard/french-sentiment-analysis-with-bert/blob/master/allocine_dataset/allocine_scraper.py) tool. Up to 30 reviews were collected for each film. +The reviews and ratings were collected using a list of [film page urls](https://github.com/TheophileBlard/french-sentiment-analysis-with-bert/blob/master/allocine_dataset/allocine_films_urls.txt) and the [allocine_scraper.py](https://github.com/TheophileBlard/french-sentiment-analysis-with-bert/blob/master/allocine_dataset/allocine_scraper.py) tool. Up to 30 reviews were collected for each film. -The reviews were originally labeled with a rating from 0.5 to 5.0 with a step of 0.5 between each rating. Ratings less than or equal to 2 are labeled as negative and ratings greater than or equal to 4 are labeled as positive. Only reviews with less than 2000 characters are included in the dataset. +The reviews were originally labeled with a rating from 0.5 to 5.0 with a step of 0.5 between each rating. Ratings less than or equal to 2 are labeled as negative and ratings greater than or equal to 4 are labeled as positive. Only reviews with less than 2000 characters are included in the dataset. #### Who are the source language producers? -The dataset contains movie reviews produced by the online community of the [Allociné.fr](https://www.allocine.fr/) website. +The dataset contains movie reviews produced by the online community of the [Allociné.fr](https://www.allocine.fr/) website. ### Annotations -The dataset does not contain any additional annotations. +The dataset does not contain any additional annotations. #### Annotation process @@ -132,23 +133,23 @@ Reviewer usernames or personal information were not collected with the reviews, ### Social Impact of Dataset -Sentiment classification is a complex task which requires sophisticated language understanding skills. Successful models can support decision-making based on the outcome of the sentiment analysis, though such models currently require a high degree of domain specificity. +Sentiment classification is a complex task which requires sophisticated language understanding skills. Successful models can support decision-making based on the outcome of the sentiment analysis, though such models currently require a high degree of domain specificity. -It should be noted that the community represented in the dataset may not represent any downstream application's potential users, and the observed behavior of a model trained on this dataset may vary based on the domain and use case. +It should be noted that the community represented in the dataset may not represent any downstream application's potential users, and the observed behavior of a model trained on this dataset may vary based on the domain and use case. ### Discussion of Biases -The Allociné website lists a number of topics which violate their [terms of service](https://www.allocine.fr/service/conditions.html#charte). Further analysis is needed to determine the extent to which moderators have successfully removed such content. +The Allociné website lists a number of topics which violate their [terms of service](https://www.allocine.fr/service/conditions.html#charte). Further analysis is needed to determine the extent to which moderators have successfully removed such content. ### Other Known Limitations -The limitations of the Allociné dataset have not yet been investigated, however [Staliūnaitė and Bonfil (2017)](https://www.aclweb.org/anthology/W17-5410.pdf) detail linguistic phenomena that are generally present in sentiment analysis but difficult for models to accurately label, such as negation, adverbial modifiers, and reviewer pragmatics. +The limitations of the Allociné dataset have not yet been investigated, however [Staliūnaitė and Bonfil (2017)](https://www.aclweb.org/anthology/W17-5410.pdf) detail linguistic phenomena that are generally present in sentiment analysis but difficult for models to accurately label, such as negation, adverbial modifiers, and reviewer pragmatics. ## Additional Information ### Dataset Curators -The Allociné dataset was collected by Théophile Blard. +The Allociné dataset was collected by Théophile Blard. ### Licensing Information diff --git a/datasets/alt/README.md b/datasets/alt/README.md index d921c8837c5..05b0dc62df7 100644 --- a/datasets/alt/README.md +++ b/datasets/alt/README.md @@ -46,6 +46,14 @@ task_ids: - machine-translation - parsing paperswithcode_id: alt +pretty_name: + alt-en: Asian Language Treebank (alt-en) + alt-jp: Asian Language Treebank (alt-jp) + alt-km: Asian Language Treebank (alt-km) + alt-my: Asian Language Treebank (alt-my) + alt-my-transliteration: Asian Language Treebank (alt-my-transliteration) + alt-my-west-transliteration: Asian Language Treebank (alt-my-west-transliteration) + alt-parallel: Asian Language Treebank (alt-parallel) --- # Dataset Card for Asian Language Treebank (ALT) diff --git a/datasets/amazon_polarity/README.md b/datasets/amazon_polarity/README.md index 9aac43e5a0c..5ad0b4ba272 100644 --- a/datasets/amazon_polarity/README.md +++ b/datasets/amazon_polarity/README.md @@ -18,6 +18,7 @@ task_categories: task_ids: - sentiment-classification paperswithcode_id: null +pretty_name: AmazonPolarity --- # Dataset Card for amazon_polarity @@ -156,4 +157,4 @@ Xiang Zhang, Junbo Zhao, Yann LeCun. Character-level Convolutional Networks for ### Contributions -Thanks to [@hfawaz](https://github.com/hfawaz) for adding this dataset. \ No newline at end of file +Thanks to [@hfawaz](https://github.com/hfawaz) for adding this dataset. diff --git a/datasets/amazon_reviews_multi/README.md b/datasets/amazon_reviews_multi/README.md index 650831d9899..4e6d68d136c 100644 --- a/datasets/amazon_reviews_multi/README.md +++ b/datasets/amazon_reviews_multi/README.md @@ -69,6 +69,14 @@ task_ids: - summarization - topic-classification paperswithcode_id: null +pretty_name: + all_languages: The Multilingual Amazon Reviews Corpus (all_languages) + de: The Multilingual Amazon Reviews Corpus (de) + en: The Multilingual Amazon Reviews Corpus (en) + es: The Multilingual Amazon Reviews Corpus (es) + fr: The Multilingual Amazon Reviews Corpus (fr) + ja: The Multilingual Amazon Reviews Corpus (ja) + zh: The Multilingual Amazon Reviews Corpus (zh) --- # Dataset Card for The Multilingual Amazon Reviews Corpus @@ -239,4 +247,4 @@ Phillip Keung, Yichao Lu, György Szarvas and Noah A. Smith. “The Multilingual ### Contributions -Thanks to [@joeddav](https://github.com/joeddav) for adding this dataset. \ No newline at end of file +Thanks to [@joeddav](https://github.com/joeddav) for adding this dataset. diff --git a/datasets/amazon_us_reviews/README.md b/datasets/amazon_us_reviews/README.md index d9ab8a859e9..b775dd27c96 100644 --- a/datasets/amazon_us_reviews/README.md +++ b/datasets/amazon_us_reviews/README.md @@ -2,6 +2,53 @@ languages: - en paperswithcode_id: null +pretty_name: + Apparel_v1_00: AmazonUsReviews (Apparel_v1_00) + Automotive_v1_00: AmazonUsReviews (Automotive_v1_00) + Baby_v1_00: AmazonUsReviews (Baby_v1_00) + Beauty_v1_00: AmazonUsReviews (Beauty_v1_00) + Books_v1_00: AmazonUsReviews (Books_v1_00) + Books_v1_01: AmazonUsReviews (Books_v1_01) + Books_v1_02: AmazonUsReviews (Books_v1_02) + Camera_v1_00: AmazonUsReviews (Camera_v1_00) + Digital_Ebook_Purchase_v1_00: AmazonUsReviews (Digital_Ebook_Purchase_v1_00) + Digital_Ebook_Purchase_v1_01: AmazonUsReviews (Digital_Ebook_Purchase_v1_01) + Digital_Music_Purchase_v1_00: AmazonUsReviews (Digital_Music_Purchase_v1_00) + Digital_Software_v1_00: AmazonUsReviews (Digital_Software_v1_00) + Digital_Video_Download_v1_00: AmazonUsReviews (Digital_Video_Download_v1_00) + Digital_Video_Games_v1_00: AmazonUsReviews (Digital_Video_Games_v1_00) + Electronics_v1_00: AmazonUsReviews (Electronics_v1_00) + Furniture_v1_00: AmazonUsReviews (Furniture_v1_00) + Gift_Card_v1_00: AmazonUsReviews (Gift_Card_v1_00) + Grocery_v1_00: AmazonUsReviews (Grocery_v1_00) + Health_Personal_Care_v1_00: AmazonUsReviews (Health_Personal_Care_v1_00) + Home_Entertainment_v1_00: AmazonUsReviews (Home_Entertainment_v1_00) + Home_Improvement_v1_00: AmazonUsReviews (Home_Improvement_v1_00) + Home_v1_00: AmazonUsReviews (Home_v1_00) + Jewelry_v1_00: AmazonUsReviews (Jewelry_v1_00) + Kitchen_v1_00: AmazonUsReviews (Kitchen_v1_00) + Lawn_and_Garden_v1_00: AmazonUsReviews (Lawn_and_Garden_v1_00) + Luggage_v1_00: AmazonUsReviews (Luggage_v1_00) + Major_Appliances_v1_00: AmazonUsReviews (Major_Appliances_v1_00) + Mobile_Apps_v1_00: AmazonUsReviews (Mobile_Apps_v1_00) + Mobile_Electronics_v1_00: AmazonUsReviews (Mobile_Electronics_v1_00) + Music_v1_00: AmazonUsReviews (Music_v1_00) + Musical_Instruments_v1_00: AmazonUsReviews (Musical_Instruments_v1_00) + Office_Products_v1_00: AmazonUsReviews (Office_Products_v1_00) + Outdoors_v1_00: AmazonUsReviews (Outdoors_v1_00) + PC_v1_00: AmazonUsReviews (PC_v1_00) + Personal_Care_Appliances_v1_00: AmazonUsReviews (Personal_Care_Appliances_v1_00) + Pet_Products_v1_00: AmazonUsReviews (Pet_Products_v1_00) + Shoes_v1_00: AmazonUsReviews (Shoes_v1_00) + Software_v1_00: AmazonUsReviews (Software_v1_00) + Sports_v1_00: AmazonUsReviews (Sports_v1_00) + Tools_v1_00: AmazonUsReviews (Tools_v1_00) + Toys_v1_00: AmazonUsReviews (Toys_v1_00) + Video_DVD_v1_00: AmazonUsReviews (Video_DVD_v1_00) + Video_Games_v1_00: AmazonUsReviews (Video_Games_v1_00) + Video_v1_00: AmazonUsReviews (Video_v1_00) + Watches_v1_00: AmazonUsReviews (Watches_v1_00) + Wireless_v1_00: AmazonUsReviews (Wireless_v1_00) --- # Dataset Card for "amazon_us_reviews" @@ -378,4 +425,4 @@ The data fields are the same among all splits. ### Contributions -Thanks to [@joeddav](https://github.com/joeddav) for adding this dataset. \ No newline at end of file +Thanks to [@joeddav](https://github.com/joeddav) for adding this dataset. diff --git a/datasets/ambig_qa/README.md b/datasets/ambig_qa/README.md index 7669922234a..1ae82b1be77 100644 --- a/datasets/ambig_qa/README.md +++ b/datasets/ambig_qa/README.md @@ -19,6 +19,9 @@ task_categories: task_ids: - open-domain-qa paperswithcode_id: ambigqa +pretty_name: + full: 'AmbigQA: Answering Ambiguous Open-domain Questions (full)' + light: 'AmbigQA: Answering Ambiguous Open-domain Questions (light)' --- # Dataset Card for AmbigQA: Answering Ambiguous Open-domain Questions @@ -117,7 +120,7 @@ Full 'nq_answer': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None), 'nq_doc_title': Value(dtype='string', id=None)} ``` -In the original data format `annotations` have different keys depending on the `type` field = `singleAnswer` or `multipleQAs`. But this implementation uses an empty list `[]` for the unavailable keys +In the original data format `annotations` have different keys depending on the `type` field = `singleAnswer` or `multipleQAs`. But this implementation uses an empty list `[]` for the unavailable keys please refer to Dataset Contents(https://github.com/shmsw25/AmbigQA#dataset-contents) for more details. @@ -220,4 +223,4 @@ Light version only has `id`, `question`, `annotations` fields ``` ### Contributions -Thanks to [@cceyda](https://github.com/cceyda) for adding this dataset. \ No newline at end of file +Thanks to [@cceyda](https://github.com/cceyda) for adding this dataset. diff --git a/datasets/amttl/README.md b/datasets/amttl/README.md index 36c7eaaa40d..8670c554b65 100644 --- a/datasets/amttl/README.md +++ b/datasets/amttl/README.md @@ -18,6 +18,7 @@ task_categories: task_ids: - parsing paperswithcode_id: null +pretty_name: AMTTL --- # Dataset Card for AMTTL @@ -146,4 +147,4 @@ paperswithcode_id: null ``` ### Contributions -Thanks to [@JetRunner](https://github.com/JetRunner) for adding this dataset. \ No newline at end of file +Thanks to [@JetRunner](https://github.com/JetRunner) for adding this dataset. diff --git a/datasets/anli/README.md b/datasets/anli/README.md index 56bc6330355..50c1d667e1c 100644 --- a/datasets/anli/README.md +++ b/datasets/anli/README.md @@ -2,6 +2,7 @@ languages: - en paperswithcode_id: anli +pretty_name: Adversarial NLI --- # Dataset Card for "anli" @@ -172,4 +173,4 @@ The data fields are the same among all splits. ### Contributions -Thanks to [@thomwolf](https://github.com/thomwolf), [@easonnie](https://github.com/easonnie), [@lhoestq](https://github.com/lhoestq), [@patrickvonplaten](https://github.com/patrickvonplaten) for adding this dataset. \ No newline at end of file +Thanks to [@thomwolf](https://github.com/thomwolf), [@easonnie](https://github.com/easonnie), [@lhoestq](https://github.com/lhoestq), [@patrickvonplaten](https://github.com/patrickvonplaten) for adding this dataset. diff --git a/datasets/app_reviews/README.md b/datasets/app_reviews/README.md index 50ef2eb81ad..1a932dad5b1 100644 --- a/datasets/app_reviews/README.md +++ b/datasets/app_reviews/README.md @@ -18,6 +18,7 @@ task_categories: task_ids: - sentiment-scoring paperswithcode_id: null +pretty_name: AppReviews --- # Dataset Card for [Dataset Name] @@ -155,4 +156,4 @@ year={2017} ### Contributions -Thanks to [@darshan-gandhi](https://github.com/darshan-gandhi) for adding this dataset. \ No newline at end of file +Thanks to [@darshan-gandhi](https://github.com/darshan-gandhi) for adding this dataset. diff --git a/datasets/aqua_rat/README.md b/datasets/aqua_rat/README.md index 11d1a885190..f0d6e222aab 100644 --- a/datasets/aqua_rat/README.md +++ b/datasets/aqua_rat/README.md @@ -19,6 +19,9 @@ task_categories: task_ids: - multiple-choice-qa paperswithcode_id: aqua-rat +pretty_name: + raw: Algebra Question Answering with Rationales (raw) + tokenized: Algebra Question Answering with Rationales (tokenized) --- # Dataset Card for AQUA-RAT diff --git a/datasets/aquamuse/README.md b/datasets/aquamuse/README.md index 90ff92c1680..24eac6b65e8 100644 --- a/datasets/aquamuse/README.md +++ b/datasets/aquamuse/README.md @@ -25,6 +25,9 @@ task_ids: - extractive-qa - other-other-query-based-multi-document-summarization paperswithcode_id: aquamuse +pretty_name: + abstractive: AQuaMuSe (abstractive) + extractive: AQuaMuSe (extractive) --- # Dataset Card for AQuaMuSe @@ -177,4 +180,4 @@ The dataset curator is [sayalikulkarni](https://github.com/google-research-datas ### Contributions -Thanks to [@Karthik-Bhaskar](https://github.com/Karthik-Bhaskar) for adding this dataset. \ No newline at end of file +Thanks to [@Karthik-Bhaskar](https://github.com/Karthik-Bhaskar) for adding this dataset. diff --git a/datasets/ar_cov19/README.md b/datasets/ar_cov19/README.md index a4cf7577ee8..6877143b1af 100644 --- a/datasets/ar_cov19/README.md +++ b/datasets/ar_cov19/README.md @@ -16,6 +16,7 @@ task_categories: task_ids: - other-other-data-mining paperswithcode_id: arcov-19 +pretty_name: ArCOV19 --- # Dataset Card for ArCOV19 @@ -151,4 +152,4 @@ No annotation was provided with the dataset. ### Contributions -Thanks to [@Fatima-Haouari](https://github.com/Fatima-Haouari) for adding this dataset. \ No newline at end of file +Thanks to [@Fatima-Haouari](https://github.com/Fatima-Haouari) for adding this dataset. diff --git a/datasets/ar_res_reviews/README.md b/datasets/ar_res_reviews/README.md index e6e4cd05568..0437be0b4ce 100644 --- a/datasets/ar_res_reviews/README.md +++ b/datasets/ar_res_reviews/README.md @@ -18,6 +18,7 @@ task_categories: task_ids: - sentiment-classification paperswithcode_id: null +pretty_name: ArRestReviews --- # Dataset Card for ArRestReviews @@ -174,4 +175,4 @@ isbn="978-3-319-18117-2" ### Contributions -Thanks to [@abdulelahsm](https://github.com/abdulelahsm) for adding this dataset. \ No newline at end of file +Thanks to [@abdulelahsm](https://github.com/abdulelahsm) for adding this dataset. diff --git a/datasets/ar_sarcasm/README.md b/datasets/ar_sarcasm/README.md index c554e519909..7c60ea2bebc 100644 --- a/datasets/ar_sarcasm/README.md +++ b/datasets/ar_sarcasm/README.md @@ -20,6 +20,7 @@ task_ids: - sentiment-classification - text-classification-other-sarcasm-detection paperswithcode_id: null +pretty_name: ArSarcasm --- # Dataset Card for ArSarcasm diff --git a/datasets/arabic_billion_words/README.md b/datasets/arabic_billion_words/README.md index e78f77b9a14..92559b0c4ee 100644 --- a/datasets/arabic_billion_words/README.md +++ b/datasets/arabic_billion_words/README.md @@ -37,6 +37,17 @@ task_categories: task_ids: - language-modeling paperswithcode_id: null +pretty_name: + Alittihad: Arabic Billion Words Corpus (Alittihad) + Almasryalyoum: Arabic Billion Words Corpus (Almasryalyoum) + Almustaqbal: Arabic Billion Words Corpus (Almustaqbal) + Alqabas: Arabic Billion Words Corpus (Alqabas) + Echoroukonline: Arabic Billion Words Corpus (Echoroukonline) + Ryiadh: Arabic Billion Words Corpus (Ryiadh) + Sabanews: Arabic Billion Words Corpus (Sabanews) + SaudiYoum: Arabic Billion Words Corpus (SaudiYoum) + Techreen: Arabic Billion Words Corpus (Techreen) + Youm7: Arabic Billion Words Corpus (Youm7) --- # Dataset Card for Arabic Billion Words Corpus @@ -173,4 +184,4 @@ The data fields are: ### Contributions -Thanks to [@zaidalyafeai](https://github.com/zaidalyafeai) for adding this dataset. \ No newline at end of file +Thanks to [@zaidalyafeai](https://github.com/zaidalyafeai) for adding this dataset. diff --git a/datasets/arabic_pos_dialect/README.md b/datasets/arabic_pos_dialect/README.md index e7a7dbaefb5..aecb10b1bb0 100644 --- a/datasets/arabic_pos_dialect/README.md +++ b/datasets/arabic_pos_dialect/README.md @@ -18,6 +18,11 @@ task_categories: task_ids: - part-of-speech-tagging paperswithcode_id: null +pretty_name: + egy: Arabic POS Dialect (egy) + glf: Arabic POS Dialect (glf) + lev: Arabic POS Dialect (lev) + mgr: Arabic POS Dialect (mgr) --- # Dataset Card for Arabic POS Dialect @@ -210,4 +215,4 @@ Kareem Darwish, Hamdy Mubarak, Ahmed Abdelali, Mohamed Eldesouki, Younes Samih, ### Contributions -Thanks to [@mcmillanmajora](https://github.com/mcmillanmajora) for adding this dataset. \ No newline at end of file +Thanks to [@mcmillanmajora](https://github.com/mcmillanmajora) for adding this dataset. diff --git a/datasets/arabic_speech_corpus/README.md b/datasets/arabic_speech_corpus/README.md index a67a07b2a95..35f068da92f 100644 --- a/datasets/arabic_speech_corpus/README.md +++ b/datasets/arabic_speech_corpus/README.md @@ -18,6 +18,7 @@ task_categories: task_ids: - other-other-automatic speech recognition paperswithcode_id: arabic-speech-corpus +pretty_name: Arabic Speech Corpus --- # Dataset Card for Arabic Speech Corpus diff --git a/datasets/arcd/README.md b/datasets/arcd/README.md index b3a9698d180..da8bea9f3c9 100644 --- a/datasets/arcd/README.md +++ b/datasets/arcd/README.md @@ -18,6 +18,7 @@ task_categories: task_ids: - extractive-qa paperswithcode_id: arcd +pretty_name: ARCD --- # Dataset Card for "arcd" @@ -191,4 +192,4 @@ The data fields are the same among all splits. ### Contributions -Thanks to [@albertvillanova](https://github.com/albertvillanova), [@lewtun](https://github.com/lewtun), [@mariamabarham](https://github.com/mariamabarham), [@thomwolf](https://github.com/thomwolf), [@tayciryahmed](https://github.com/tayciryahmed) for adding this dataset. \ No newline at end of file +Thanks to [@albertvillanova](https://github.com/albertvillanova), [@lewtun](https://github.com/lewtun), [@mariamabarham](https://github.com/mariamabarham), [@thomwolf](https://github.com/thomwolf), [@tayciryahmed](https://github.com/tayciryahmed) for adding this dataset. diff --git a/datasets/arsentd_lev/README.md b/datasets/arsentd_lev/README.md index 1bbcd067b12..5be2b1601a4 100644 --- a/datasets/arsentd_lev/README.md +++ b/datasets/arsentd_lev/README.md @@ -20,6 +20,7 @@ task_ids: - sentiment-classification - topic-classification paperswithcode_id: arsentd-lev +pretty_name: ArSenTD-LEV --- # Dataset Card for ArSenTD-LEV @@ -156,4 +157,4 @@ Make sure to read and agree to the [license](http://oma-project.com/ArSenL/ArSen ### Contributions -Thanks to [@moussaKam](https://github.com/moussaKam) for adding this dataset. \ No newline at end of file +Thanks to [@moussaKam](https://github.com/moussaKam) for adding this dataset. diff --git a/datasets/art/README.md b/datasets/art/README.md index 7afd644ea41..3e71a43e5aa 100644 --- a/datasets/art/README.md +++ b/datasets/art/README.md @@ -2,6 +2,7 @@ languages: - en paperswithcode_id: art-dataset +pretty_name: Abductive Reasoning in narrative Text --- # Dataset Card for "art" @@ -167,4 +168,4 @@ The data fields are the same among all splits. ### Contributions -Thanks to [@patrickvonplaten](https://github.com/patrickvonplaten), [@thomwolf](https://github.com/thomwolf), [@mariamabarham](https://github.com/mariamabarham), [@lewtun](https://github.com/lewtun), [@lhoestq](https://github.com/lhoestq) for adding this dataset. \ No newline at end of file +Thanks to [@patrickvonplaten](https://github.com/patrickvonplaten), [@thomwolf](https://github.com/thomwolf), [@mariamabarham](https://github.com/mariamabarham), [@lewtun](https://github.com/lewtun), [@lhoestq](https://github.com/lhoestq) for adding this dataset. diff --git a/datasets/arxiv_dataset/README.md b/datasets/arxiv_dataset/README.md index 43bb681dafa..fdd9b79b264 100644 --- a/datasets/arxiv_dataset/README.md +++ b/datasets/arxiv_dataset/README.md @@ -25,6 +25,7 @@ task_ids: - summarization - text-simplification paperswithcode_id: null +pretty_name: arXiv Dataset --- # Dataset Card For arXiv Dataset @@ -186,4 +187,4 @@ The data is under the [Creative Commons CC0 1.0 Universal Public Domain Dedicati ### Contributions -Thanks to [@tanmoyio](https://github.com/tanmoyio) for adding this dataset. \ No newline at end of file +Thanks to [@tanmoyio](https://github.com/tanmoyio) for adding this dataset. diff --git a/datasets/ascent_kb/README.md b/datasets/ascent_kb/README.md index 251736c3d5d..0f28f7414d4 100644 --- a/datasets/ascent_kb/README.md +++ b/datasets/ascent_kb/README.md @@ -18,6 +18,9 @@ task_categories: task_ids: - other-other-knowledge-base paperswithcode_id: ascentkb +pretty_name: + canonical: Ascent KB (canonical) + open: Ascent KB (open) --- # Dataset Card for Ascent KB diff --git a/datasets/aslg_pc12/README.md b/datasets/aslg_pc12/README.md index 9ad1201e539..690c3170a90 100644 --- a/datasets/aslg_pc12/README.md +++ b/datasets/aslg_pc12/README.md @@ -2,6 +2,7 @@ languages: - en paperswithcode_id: aslg-pc12 +pretty_name: English-ASL Gloss Parallel Corpus 2012 --- # Dataset Card for "aslg_pc12" @@ -155,4 +156,4 @@ The data fields are the same among all splits. ### Contributions -Thanks to [@AmitMY](https://github.com/AmitMY) for adding this dataset. \ No newline at end of file +Thanks to [@AmitMY](https://github.com/AmitMY) for adding this dataset. diff --git a/datasets/asnq/README.md b/datasets/asnq/README.md index 508b9e74126..bcad6df2094 100644 --- a/datasets/asnq/README.md +++ b/datasets/asnq/README.md @@ -2,6 +2,7 @@ languages: - en paperswithcode_id: asnq +pretty_name: Answer Sentence Natural Questions --- # Dataset Card for "asnq" @@ -174,4 +175,4 @@ The data fields are the same among all splits. ### Contributions -Thanks to [@mkserge](https://github.com/mkserge) for adding this dataset. \ No newline at end of file +Thanks to [@mkserge](https://github.com/mkserge) for adding this dataset. diff --git a/datasets/asset/README.md b/datasets/asset/README.md index c7d660fd214..616fcd7f42d 100644 --- a/datasets/asset/README.md +++ b/datasets/asset/README.md @@ -25,6 +25,9 @@ task_ids: simplification: - text-simplification paperswithcode_id: asset +pretty_name: + ratings: ASSET (ratings) + simplification: ASSET (simplification) --- # Dataset Card for ASSET @@ -204,4 +207,4 @@ This dataset card uses material written by [Juan Diego Rodriguez](https://github ### Contributions -Thanks to [@yjernite](https://github.com/yjernite) for adding this dataset. \ No newline at end of file +Thanks to [@yjernite](https://github.com/yjernite) for adding this dataset. diff --git a/datasets/assin/README.md b/datasets/assin/README.md index 32c58c0afd2..1ae3a2205d6 100644 --- a/datasets/assin/README.md +++ b/datasets/assin/README.md @@ -20,6 +20,10 @@ task_ids: - natural-language-inference - semantic-similarity-scoring paperswithcode_id: assin +pretty_name: + full: ASSIN (full) + ptbr: ASSIN (ptbr) + ptpt: ASSIN (ptpt) --- # Dataset Card for ASSIN @@ -191,4 +195,4 @@ The data is split into train, validation and test set. The split sizes are as fo ### Contributions -Thanks to [@jonatasgrosman](https://github.com/jonatasgrosman) for adding this dataset. \ No newline at end of file +Thanks to [@jonatasgrosman](https://github.com/jonatasgrosman) for adding this dataset. diff --git a/datasets/assin2/README.md b/datasets/assin2/README.md index 552bcca641d..6a4cbdc4c52 100644 --- a/datasets/assin2/README.md +++ b/datasets/assin2/README.md @@ -20,6 +20,7 @@ task_ids: - natural-language-inference - semantic-similarity-scoring paperswithcode_id: assin2 +pretty_name: ASSIN 2 --- # Dataset Card for ASSIN 2 @@ -172,4 +173,4 @@ The data is split into train, validation and test set. The split sizes are as fo ### Contributions -Thanks to [@jonatasgrosman](https://github.com/jonatasgrosman) for adding this dataset. \ No newline at end of file +Thanks to [@jonatasgrosman](https://github.com/jonatasgrosman) for adding this dataset. diff --git a/datasets/atomic/README.md b/datasets/atomic/README.md index 78b202f8de9..7e2cf55feeb 100755 --- a/datasets/atomic/README.md +++ b/datasets/atomic/README.md @@ -18,6 +18,8 @@ task_categories: task_ids: - other-stuctured-to-text paperswithcode_id: atomic +pretty_name: An Atlas of Machine Commonsense for If-Then Reasoning - Atomic Common + Sense Dataset --- # Dataset Card for An Atlas of Machine Commonsense for If-Then Reasoning - Atomic Common Sense Dataset @@ -180,4 +182,4 @@ The Creative Commons Attribution 4.0 International License. https://creativecomm ### Contributions -Thanks to [@ontocord](https://github.com/ontocord) for adding this dataset. \ No newline at end of file +Thanks to [@ontocord](https://github.com/ontocord) for adding this dataset. diff --git a/datasets/autshumato/README.md b/datasets/autshumato/README.md index ecb6ef5e9d5..d4634c58992 100644 --- a/datasets/autshumato/README.md +++ b/datasets/autshumato/README.md @@ -44,6 +44,13 @@ task_categories: task_ids: - machine-translation paperswithcode_id: null +pretty_name: + autshumato-en-tn: autshumato (autshumato-en-tn) + autshumato-en-ts: autshumato (autshumato-en-ts) + autshumato-en-ts-manual: autshumato (autshumato-en-ts-manual) + autshumato-en-zu: autshumato (autshumato-en-zu) + autshumato-tn: autshumato (autshumato-tn) + autshumato-ts: autshumato (autshumato-ts) --- # Dataset Card Creation Guide @@ -183,4 +190,4 @@ project is to develop machine translation systems for three South African langua ### Contributions -Thanks to [@Narsil](https://github.com/Narsil) for adding this dataset. \ No newline at end of file +Thanks to [@Narsil](https://github.com/Narsil) for adding this dataset. diff --git a/datasets/babi_qa/README.md b/datasets/babi_qa/README.md index 98562c1e6cd..61b0255c3d2 100644 --- a/datasets/babi_qa/README.md +++ b/datasets/babi_qa/README.md @@ -337,6 +337,167 @@ task_categories: task_ids: - question-answering-other-chained-qa paperswithcode_id: babi-1 +pretty_name: + en-10k-qa1: bAbI (en-10k-qa1) + en-10k-qa10: bAbI (en-10k-qa10) + en-10k-qa11: bAbI (en-10k-qa11) + en-10k-qa12: bAbI (en-10k-qa12) + en-10k-qa13: bAbI (en-10k-qa13) + en-10k-qa14: bAbI (en-10k-qa14) + en-10k-qa15: bAbI (en-10k-qa15) + en-10k-qa16: bAbI (en-10k-qa16) + en-10k-qa17: bAbI (en-10k-qa17) + en-10k-qa18: bAbI (en-10k-qa18) + en-10k-qa19: bAbI (en-10k-qa19) + en-10k-qa2: bAbI (en-10k-qa2) + en-10k-qa20: bAbI (en-10k-qa20) + en-10k-qa3: bAbI (en-10k-qa3) + en-10k-qa4: bAbI (en-10k-qa4) + en-10k-qa5: bAbI (en-10k-qa5) + en-10k-qa6: bAbI (en-10k-qa6) + en-10k-qa7: bAbI (en-10k-qa7) + en-10k-qa8: bAbI (en-10k-qa8) + en-10k-qa9: bAbI (en-10k-qa9) + en-qa1: bAbI (en-qa1) + en-qa10: bAbI (en-qa10) + en-qa11: bAbI (en-qa11) + en-qa12: bAbI (en-qa12) + en-qa13: bAbI (en-qa13) + en-qa14: bAbI (en-qa14) + en-qa15: bAbI (en-qa15) + en-qa16: bAbI (en-qa16) + en-qa17: bAbI (en-qa17) + en-qa18: bAbI (en-qa18) + en-qa19: bAbI (en-qa19) + en-qa2: bAbI (en-qa2) + en-qa20: bAbI (en-qa20) + en-qa3: bAbI (en-qa3) + en-qa4: bAbI (en-qa4) + en-qa5: bAbI (en-qa5) + en-qa6: bAbI (en-qa6) + en-qa7: bAbI (en-qa7) + en-qa8: bAbI (en-qa8) + en-qa9: bAbI (en-qa9) + en-valid-10k-qa1: bAbI (en-valid-10k-qa1) + en-valid-10k-qa10: bAbI (en-valid-10k-qa10) + en-valid-10k-qa11: bAbI (en-valid-10k-qa11) + en-valid-10k-qa12: bAbI (en-valid-10k-qa12) + en-valid-10k-qa13: bAbI (en-valid-10k-qa13) + en-valid-10k-qa14: bAbI (en-valid-10k-qa14) + en-valid-10k-qa15: bAbI (en-valid-10k-qa15) + en-valid-10k-qa16: bAbI (en-valid-10k-qa16) + en-valid-10k-qa17: bAbI (en-valid-10k-qa17) + en-valid-10k-qa18: bAbI (en-valid-10k-qa18) + en-valid-10k-qa19: bAbI (en-valid-10k-qa19) + en-valid-10k-qa2: bAbI (en-valid-10k-qa2) + en-valid-10k-qa20: bAbI (en-valid-10k-qa20) + en-valid-10k-qa3: bAbI (en-valid-10k-qa3) + en-valid-10k-qa4: bAbI (en-valid-10k-qa4) + en-valid-10k-qa5: bAbI (en-valid-10k-qa5) + en-valid-10k-qa6: bAbI (en-valid-10k-qa6) + en-valid-10k-qa7: bAbI (en-valid-10k-qa7) + en-valid-10k-qa8: bAbI (en-valid-10k-qa8) + en-valid-10k-qa9: bAbI (en-valid-10k-qa9) + en-valid-qa1: bAbI (en-valid-qa1) + en-valid-qa10: bAbI (en-valid-qa10) + en-valid-qa11: bAbI (en-valid-qa11) + en-valid-qa12: bAbI (en-valid-qa12) + en-valid-qa13: bAbI (en-valid-qa13) + en-valid-qa14: bAbI (en-valid-qa14) + en-valid-qa15: bAbI (en-valid-qa15) + en-valid-qa16: bAbI (en-valid-qa16) + en-valid-qa17: bAbI (en-valid-qa17) + en-valid-qa18: bAbI (en-valid-qa18) + en-valid-qa19: bAbI (en-valid-qa19) + en-valid-qa2: bAbI (en-valid-qa2) + en-valid-qa20: bAbI (en-valid-qa20) + en-valid-qa3: bAbI (en-valid-qa3) + en-valid-qa4: bAbI (en-valid-qa4) + en-valid-qa5: bAbI (en-valid-qa5) + en-valid-qa6: bAbI (en-valid-qa6) + en-valid-qa7: bAbI (en-valid-qa7) + en-valid-qa8: bAbI (en-valid-qa8) + en-valid-qa9: bAbI (en-valid-qa9) + hn-10k-qa1: bAbI (hn-10k-qa1) + hn-10k-qa10: bAbI (hn-10k-qa10) + hn-10k-qa11: bAbI (hn-10k-qa11) + hn-10k-qa12: bAbI (hn-10k-qa12) + hn-10k-qa13: bAbI (hn-10k-qa13) + hn-10k-qa14: bAbI (hn-10k-qa14) + hn-10k-qa15: bAbI (hn-10k-qa15) + hn-10k-qa16: bAbI (hn-10k-qa16) + hn-10k-qa17: bAbI (hn-10k-qa17) + hn-10k-qa18: bAbI (hn-10k-qa18) + hn-10k-qa19: bAbI (hn-10k-qa19) + hn-10k-qa2: bAbI (hn-10k-qa2) + hn-10k-qa20: bAbI (hn-10k-qa20) + hn-10k-qa3: bAbI (hn-10k-qa3) + hn-10k-qa4: bAbI (hn-10k-qa4) + hn-10k-qa5: bAbI (hn-10k-qa5) + hn-10k-qa6: bAbI (hn-10k-qa6) + hn-10k-qa7: bAbI (hn-10k-qa7) + hn-10k-qa8: bAbI (hn-10k-qa8) + hn-10k-qa9: bAbI (hn-10k-qa9) + hn-qa1: bAbI (hn-qa1) + hn-qa10: bAbI (hn-qa10) + hn-qa11: bAbI (hn-qa11) + hn-qa12: bAbI (hn-qa12) + hn-qa13: bAbI (hn-qa13) + hn-qa14: bAbI (hn-qa14) + hn-qa15: bAbI (hn-qa15) + hn-qa16: bAbI (hn-qa16) + hn-qa17: bAbI (hn-qa17) + hn-qa18: bAbI (hn-qa18) + hn-qa19: bAbI (hn-qa19) + hn-qa2: bAbI (hn-qa2) + hn-qa20: bAbI (hn-qa20) + hn-qa3: bAbI (hn-qa3) + hn-qa4: bAbI (hn-qa4) + hn-qa5: bAbI (hn-qa5) + hn-qa6: bAbI (hn-qa6) + hn-qa7: bAbI (hn-qa7) + hn-qa8: bAbI (hn-qa8) + hn-qa9: bAbI (hn-qa9) + shuffled-10k-qa1: bAbI (shuffled-10k-qa1) + shuffled-10k-qa10: bAbI (shuffled-10k-qa10) + shuffled-10k-qa11: bAbI (shuffled-10k-qa11) + shuffled-10k-qa12: bAbI (shuffled-10k-qa12) + shuffled-10k-qa13: bAbI (shuffled-10k-qa13) + shuffled-10k-qa14: bAbI (shuffled-10k-qa14) + shuffled-10k-qa15: bAbI (shuffled-10k-qa15) + shuffled-10k-qa16: bAbI (shuffled-10k-qa16) + shuffled-10k-qa17: bAbI (shuffled-10k-qa17) + shuffled-10k-qa18: bAbI (shuffled-10k-qa18) + shuffled-10k-qa19: bAbI (shuffled-10k-qa19) + shuffled-10k-qa2: bAbI (shuffled-10k-qa2) + shuffled-10k-qa20: bAbI (shuffled-10k-qa20) + shuffled-10k-qa3: bAbI (shuffled-10k-qa3) + shuffled-10k-qa4: bAbI (shuffled-10k-qa4) + shuffled-10k-qa5: bAbI (shuffled-10k-qa5) + shuffled-10k-qa6: bAbI (shuffled-10k-qa6) + shuffled-10k-qa7: bAbI (shuffled-10k-qa7) + shuffled-10k-qa8: bAbI (shuffled-10k-qa8) + shuffled-10k-qa9: bAbI (shuffled-10k-qa9) + shuffled-qa1: bAbI (shuffled-qa1) + shuffled-qa10: bAbI (shuffled-qa10) + shuffled-qa11: bAbI (shuffled-qa11) + shuffled-qa12: bAbI (shuffled-qa12) + shuffled-qa13: bAbI (shuffled-qa13) + shuffled-qa14: bAbI (shuffled-qa14) + shuffled-qa15: bAbI (shuffled-qa15) + shuffled-qa16: bAbI (shuffled-qa16) + shuffled-qa17: bAbI (shuffled-qa17) + shuffled-qa18: bAbI (shuffled-qa18) + shuffled-qa19: bAbI (shuffled-qa19) + shuffled-qa2: bAbI (shuffled-qa2) + shuffled-qa20: bAbI (shuffled-qa20) + shuffled-qa3: bAbI (shuffled-qa3) + shuffled-qa4: bAbI (shuffled-qa4) + shuffled-qa5: bAbI (shuffled-qa5) + shuffled-qa6: bAbI (shuffled-qa6) + shuffled-qa7: bAbI (shuffled-qa7) + shuffled-qa8: bAbI (shuffled-qa8) + shuffled-qa9: bAbI (shuffled-qa9) --- diff --git a/datasets/banking77/README.md b/datasets/banking77/README.md index a786fd2436f..f40c98d0ce3 100644 --- a/datasets/banking77/README.md +++ b/datasets/banking77/README.md @@ -21,6 +21,7 @@ task_ids: - intent-classification - multi-class-classification paperswithcode_id: null +pretty_name: BANKING77 --- # Dataset Card for BANKING77 diff --git a/datasets/bbaw_egyptian/README.md b/datasets/bbaw_egyptian/README.md index 475a218fdc9..a34c9e2610e 100644 --- a/datasets/bbaw_egyptian/README.md +++ b/datasets/bbaw_egyptian/README.md @@ -20,6 +20,7 @@ task_categories: task_ids: - machine-translation paperswithcode_id: null +pretty_name: BbawEgyptian --- # Dataset Card for "bbaw_egyptian" diff --git a/datasets/bbc_hindi_nli/README.md b/datasets/bbc_hindi_nli/README.md index 664f4cbc02f..0bf0320a058 100644 --- a/datasets/bbc_hindi_nli/README.md +++ b/datasets/bbc_hindi_nli/README.md @@ -18,6 +18,7 @@ task_categories: task_ids: - natural-language-inference paperswithcode_id: null +pretty_name: BBC Hindi NLI Dataset --- # Dataset Card for BBC Hindi NLI Dataset diff --git a/datasets/bc2gm_corpus/README.md b/datasets/bc2gm_corpus/README.md index b8d35d271db..d8b3d9670fa 100644 --- a/datasets/bc2gm_corpus/README.md +++ b/datasets/bc2gm_corpus/README.md @@ -18,6 +18,7 @@ task_categories: task_ids: - named-entity-recognition paperswithcode_id: null +pretty_name: Bc2GmCorpus --- # Dataset Card for bc2gm_corpus @@ -142,4 +143,4 @@ paperswithcode_id: null ### Contributions -Thanks to [@mahajandiwakar](https://github.com/mahajandiwakar) for adding this dataset. \ No newline at end of file +Thanks to [@mahajandiwakar](https://github.com/mahajandiwakar) for adding this dataset. diff --git a/datasets/best2009/README.md b/datasets/best2009/README.md index a161de52fca..026633232b6 100644 --- a/datasets/best2009/README.md +++ b/datasets/best2009/README.md @@ -18,6 +18,7 @@ task_categories: task_ids: - structure-prediction-other-word-tokenization paperswithcode_id: null +pretty_name: best2009 --- # Dataset Card for `best2009` @@ -186,4 +187,4 @@ Character type features: ### Contributions -Thanks to [@cstorm125](https://github.com/cstorm125) for adding this dataset. \ No newline at end of file +Thanks to [@cstorm125](https://github.com/cstorm125) for adding this dataset. diff --git a/datasets/bianet/README.md b/datasets/bianet/README.md index 75e8b0772b4..0dad503450e 100644 --- a/datasets/bianet/README.md +++ b/datasets/bianet/README.md @@ -31,6 +31,10 @@ task_categories: task_ids: - machine-translation paperswithcode_id: bianet +pretty_name: + en_to_ku: Bianet (en_to_ku) + en_to_tr: Bianet (en_to_tr) + ku_to_tr: Bianet (ku_to_tr) --- # Dataset Card for [Dataset Name] @@ -172,4 +176,4 @@ CC-BY-SA-4.0 ### Contributions -Thanks to [@param087](https://github.com/param087) for adding this dataset. \ No newline at end of file +Thanks to [@param087](https://github.com/param087) for adding this dataset. diff --git a/datasets/bible_para/README.md b/datasets/bible_para/README.md index 41955d4b754..99ae51adfc8 100644 --- a/datasets/bible_para/README.md +++ b/datasets/bible_para/README.md @@ -119,6 +119,13 @@ task_categories: task_ids: - machine-translation paperswithcode_id: null +pretty_name: + de-en: BiblePara (de-en) + en-es: BiblePara (en-es) + en-fi: BiblePara (en-fi) + en-fr: BiblePara (en-fr) + en-hi: BiblePara (en-hi) + en-no: BiblePara (en-no) --- # Dataset Card Creation Guide @@ -251,4 +258,4 @@ Here are some examples of questions and facts: ### Contributions -Thanks to [@abhishekkrthakur](https://github.com/abhishekkrthakur) for adding this dataset. \ No newline at end of file +Thanks to [@abhishekkrthakur](https://github.com/abhishekkrthakur) for adding this dataset. diff --git a/datasets/big_patent/README.md b/datasets/big_patent/README.md index 684ed722a29..4d87e0a0bfa 100644 --- a/datasets/big_patent/README.md +++ b/datasets/big_patent/README.md @@ -37,6 +37,17 @@ task_categories: task_ids: - summarization paperswithcode_id: bigpatent +pretty_name: + a: Big Patent (a) + all: Big Patent (all) + b: Big Patent (b) + c: Big Patent (c) + d: Big Patent (d) + e: Big Patent (e) + f: Big Patent (f) + g: Big Patent (g) + h: Big Patent (h) + y: Big Patent (y) --- # Dataset Card for Big Patent @@ -170,4 +181,4 @@ Each instance contains a pair of `description` and `abstract`. `description` is ### Contributions -Thanks to [@mattbui](https://github.com/mattbui) for adding this dataset. \ No newline at end of file +Thanks to [@mattbui](https://github.com/mattbui) for adding this dataset. diff --git a/datasets/billsum/README.md b/datasets/billsum/README.md index 15fc049d932..694d095cac7 100644 --- a/datasets/billsum/README.md +++ b/datasets/billsum/README.md @@ -18,6 +18,7 @@ task_categories: task_ids: - summarization paperswithcode_id: billsum +pretty_name: BillSum --- # Dataset Card for "billsum" @@ -184,4 +185,4 @@ The data fields are the same among all splits. ### Contributions -Thanks to [@thomwolf](https://github.com/thomwolf), [@jplu](https://github.com/jplu), [@lewtun](https://github.com/lewtun) for adding this dataset. \ No newline at end of file +Thanks to [@thomwolf](https://github.com/thomwolf), [@jplu](https://github.com/jplu), [@lewtun](https://github.com/lewtun) for adding this dataset. diff --git a/datasets/bing_coronavirus_query_set/README.md b/datasets/bing_coronavirus_query_set/README.md index 61dcd7fc4a0..f96ae1ae61d 100644 --- a/datasets/bing_coronavirus_query_set/README.md +++ b/datasets/bing_coronavirus_query_set/README.md @@ -18,6 +18,7 @@ task_categories: task_ids: - intent-classification paperswithcode_id: null +pretty_name: BingCoronavirusQuerySet --- # Dataset Card Creation Guide @@ -153,4 +154,4 @@ You can also load the data by country by using `queries_by="country"`. ### Contributions -Thanks to [@abhishekkrthakur](https://github.com/abhishekkrthakur) for adding this dataset. \ No newline at end of file +Thanks to [@abhishekkrthakur](https://github.com/abhishekkrthakur) for adding this dataset.