huggingface · lhoestq · Jun 21, 2021 · Jun 18, 2021 · Jun 18, 2021 · Jun 18, 2021
diff --git a/datasets/emotion/README.md b/datasets/emotion/README.md
@@ -1,6 +1,24 @@
 ---
+pretty_name: Emotion
+annotations_creators:
+- machine-generated
+language_creators:
+- machine-generated
 languages:
 - en
+licenses:
+- unknown
+multilinguality:
+- monolingual
+size_categories:
+- 10K<n<100K
+source_datasets:
+- original
+task_categories:
+- text-classification
+task_ids:
+- multi-class-classification
+- text-classification-other-emotion-classification
 paperswithcode_id: emotion
 ---
 
@@ -97,10 +115,10 @@ The data fields are the same among all splits.
 
 ### Data Splits
 
-| name  |train|validation|test|
-|-------|----:|---------:|---:|
-|default|16000|      2000|2000|
-|emotion|16000|      2000|2000|
+| name    | train | validation | test |
+| ------- | ----: | ---------: | ---: |
+| default | 16000 |       2000 | 2000 |
+| emotion | 16000 |       2000 | 2000 |
 
 ## Dataset Creation
 
@@ -182,4 +200,4 @@ The data fields are the same among all splits.
 
 ### Contributions
 
-Thanks to [@lhoestq](https://github.com/lhoestq), [@thomwolf](https://github.com/thomwolf), [@lewtun](https://github.com/lewtun) for adding this dataset.
+Thanks to [@lhoestq](https://github.com/lhoestq), [@thomwolf](https://github.com/thomwolf), [@lewtun](https://github.com/lewtun) for adding this dataset.
diff --git a/datasets/emotion/dataset_infos.json b/datasets/emotion/dataset_infos.json
@@ -1 +1 @@
-{"emotion":{"description":"Emotion is a dataset of English Twitter messages with six basic emotions: anger, fear, joy, love, sadness, and surprise. For more detailed information please refer to the\npaper.\n","citation":"@inproceedings{saravia-etal-2018-carer,\n    title = \"{CARER}: Contextualized Affect Representations for Emotion Recognition\",\n    author = \"Saravia, Elvis  and\n      Liu, Hsien-Chi Toby  and\n      Huang, Yen-Hao  and\n      Wu, Junlin  and\n      Chen, Yi-Shin\",\n    booktitle = \"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing\",\n    month = oct # \"-\" # nov,\n    year = \"2018\",\n    address = \"Brussels, Belgium\",\n    publisher = \"Association for Computational Linguistics\",\n    url = \"https://www.aclweb.org/anthology/D18-1404\",\n    doi = \"10.18653/v1/D18-1404\",\n    pages = \"3687--3697\",\n    abstract = \"Emotions are expressed in nuanced ways, which varies by collective or individual experiences, knowledge, and beliefs. Therefore, to understand emotion, as conveyed through text, a robust mechanism capable of capturing and modeling different linguistic nuances and phenomena is needed. We propose a semi-supervised, graph-based algorithm to produce rich structural descriptors which serve as the building blocks for constructing contextualized affect representations from text. The pattern-based representations are further enriched with word embeddings and evaluated through several emotion recognition tasks. Our experimental results demonstrate that the proposed method outperforms state-of-the-art techniques on emotion recognition tasks.\",\n}\n","homepage":"https://github.com/dair-ai/emotion_dataset","license":"","features":{"text":{"dtype":"string","id":null,"_type":"Value"},"label":{"dtype":"string","id":null,"_type":"Value"}},"post_processed":null,"supervised_keys":null,"builder_name":"emotion","config_name":"emotion","version":{"version_str":"0.1.0","description":"First Emotion release","major":0,"minor":1,"patch":0},"splits":{"train":{"name":"train","num_bytes":1754632,"num_examples":16000,"dataset_name":"emotion"},"validation":{"name":"validation","num_bytes":216248,"num_examples":2000,"dataset_name":"emotion"},"test":{"name":"test","num_bytes":218768,"num_examples":2000,"dataset_name":"emotion"}},"download_checksums":{"https://www.dropbox.com/s/1pzkadrvffbqw6o/train.txt?dl=1":{"num_bytes":1658616,"checksum":"3ab03d945a6cb783d818ccd06dafd52d2ed8b4f62f0f85a09d7d11870865b190"},"https://www.dropbox.com/s/2mzialpsgf9k5l3/val.txt?dl=1":{"num_bytes":204240,"checksum":"34faaa31962fe63cdf5dbf6c132ef8ab166c640254ab991af78f3aea375e79ef"},"https://www.dropbox.com/s/ikkqxfdbdec3fuj/test.txt?dl=1":{"num_bytes":206760,"checksum":"60f531690d20127339e7f054edc299a82c627b5ec0dd5d552d53d544e0cfcc17"}},"download_size":2069616,"post_processing_size":null,"dataset_size":2189648,"size_in_bytes":4259264},"default":{"description":"Emotion is a dataset of English Twitter messages with six basic emotions: anger, fear, joy, love, sadness, and surprise. For more detailed information please refer to the paper.\n","citation":"@inproceedings{saravia-etal-2018-carer,\n    title = \"{CARER}: Contextualized Affect Representations for Emotion Recognition\",\n    author = \"Saravia, Elvis  and\n      Liu, Hsien-Chi Toby  and\n      Huang, Yen-Hao  and\n      Wu, Junlin  and\n      Chen, Yi-Shin\",\n    booktitle = \"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing\",\n    month = oct # \"-\" # nov,\n    year = \"2018\",\n    address = \"Brussels, Belgium\",\n    publisher = \"Association for Computational Linguistics\",\n    url = \"https://www.aclweb.org/anthology/D18-1404\",\n    doi = \"10.18653/v1/D18-1404\",\n    pages = \"3687--3697\",\n    abstract = \"Emotions are expressed in nuanced ways, which varies by collective or individual experiences, knowledge, and beliefs. Therefore, to understand emotion, as conveyed through text, a robust mechanism capable of capturing and modeling different linguistic nuances and phenomena is needed. We propose a semi-supervised, graph-based algorithm to produce rich structural descriptors which serve as the building blocks for constructing contextualized affect representations from text. The pattern-based representations are further enriched with word embeddings and evaluated through several emotion recognition tasks. Our experimental results demonstrate that the proposed method outperforms state-of-the-art techniques on emotion recognition tasks.\",\n}\n","homepage":"https://github.com/dair-ai/emotion_dataset","license":"","features":{"text":{"dtype":"string","id":null,"_type":"Value"},"label":{"num_classes":6,"names":["sadness","joy","love","anger","fear","surprise"],"names_file":null,"id":null,"_type":"ClassLabel"}},"post_processed":null,"supervised_keys":{"input":"text","output":"label"},"builder_name":"emotion","config_name":"default","version":{"version_str":"0.0.0","description":null,"major":0,"minor":0,"patch":0},"splits":{"train":{"name":"train","num_bytes":1741541,"num_examples":16000,"dataset_name":"emotion"},"validation":{"name":"validation","num_bytes":214699,"num_examples":2000,"dataset_name":"emotion"},"test":{"name":"test","num_bytes":217177,"num_examples":2000,"dataset_name":"emotion"}},"download_checksums":{"https://www.dropbox.com/s/1pzkadrvffbqw6o/train.txt?dl=1":{"num_bytes":1658616,"checksum":"3ab03d945a6cb783d818ccd06dafd52d2ed8b4f62f0f85a09d7d11870865b190"},"https://www.dropbox.com/s/2mzialpsgf9k5l3/val.txt?dl=1":{"num_bytes":204240,"checksum":"34faaa31962fe63cdf5dbf6c132ef8ab166c640254ab991af78f3aea375e79ef"},"https://www.dropbox.com/s/ikkqxfdbdec3fuj/test.txt?dl=1":{"num_bytes":206760,"checksum":"60f531690d20127339e7f054edc299a82c627b5ec0dd5d552d53d544e0cfcc17"}},"download_size":2069616,"post_processing_size":null,"dataset_size":2173417,"size_in_bytes":4243033}}
+{"default": {"description": "Emotion is a dataset of English Twitter messages with six basic emotions: anger, fear, joy, love, sadness, and surprise. For more detailed information please refer to the paper.\n", "citation": "@inproceedings{saravia-etal-2018-carer,\n    title = \"{CARER}: Contextualized Affect Representations for Emotion Recognition\",\n    author = \"Saravia, Elvis  and\n      Liu, Hsien-Chi Toby  and\n      Huang, Yen-Hao  and\n      Wu, Junlin  and\n      Chen, Yi-Shin\",\n    booktitle = \"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing\",\n    month = oct # \"-\" # nov,\n    year = \"2018\",\n    address = \"Brussels, Belgium\",\n    publisher = \"Association for Computational Linguistics\",\n    url = \"https://www.aclweb.org/anthology/D18-1404\",\n    doi = \"10.18653/v1/D18-1404\",\n    pages = \"3687--3697\",\n    abstract = \"Emotions are expressed in nuanced ways, which varies by collective or individual experiences, knowledge, and beliefs. Therefore, to understand emotion, as conveyed through text, a robust mechanism capable of capturing and modeling different linguistic nuances and phenomena is needed. We propose a semi-supervised, graph-based algorithm to produce rich structural descriptors which serve as the building blocks for constructing contextualized affect representations from text. The pattern-based representations are further enriched with word embeddings and evaluated through several emotion recognition tasks. Our experimental results demonstrate that the proposed method outperforms state-of-the-art techniques on emotion recognition tasks.\",\n}\n", "homepage": "https://github.com/dair-ai/emotion_dataset", "license": "", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 6, "names": ["sadness", "joy", "love", "anger", "fear", "surprise"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "post_processed": null, "supervised_keys": {"input": "text", "output": "label"}, "task_templates": [{"task": "text-classification", "text_column": "text", "label_column": "label", "labels": ["anger", "fear", "joy", "love", "sadness", "surprise"]}], "builder_name": "emotion", "config_name": "default", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1741541, "num_examples": 16000, "dataset_name": "emotion"}, "validation": {"name": "validation", "num_bytes": 214699, "num_examples": 2000, "dataset_name": "emotion"}, "test": {"name": "test", "num_bytes": 217177, "num_examples": 2000, "dataset_name": "emotion"}}, "download_checksums": {"https://www.dropbox.com/s/1pzkadrvffbqw6o/train.txt?dl=1": {"num_bytes": 1658616, "checksum": "3ab03d945a6cb783d818ccd06dafd52d2ed8b4f62f0f85a09d7d11870865b190"}, "https://www.dropbox.com/s/2mzialpsgf9k5l3/val.txt?dl=1": {"num_bytes": 204240, "checksum": "34faaa31962fe63cdf5dbf6c132ef8ab166c640254ab991af78f3aea375e79ef"}, "https://www.dropbox.com/s/ikkqxfdbdec3fuj/test.txt?dl=1": {"num_bytes": 206760, "checksum": "60f531690d20127339e7f054edc299a82c627b5ec0dd5d552d53d544e0cfcc17"}}, "download_size": 2069616, "post_processing_size": null, "dataset_size": 2173417, "size_in_bytes": 4243033}}
diff --git a/datasets/emotion/emotion.py b/datasets/emotion/emotion.py
@@ -1,6 +1,7 @@
 import csv
 
 import datasets
+from datasets.tasks import TextClassification
 
 
 _CITATION = """\
@@ -44,6 +45,7 @@ def _info(self):
             supervised_keys=("text", "label"),
             homepage=_URL,
             citation=_CITATION,
+            task_templates=[TextClassification(text_column="text", label_column="label")],
         )
 
     def _split_generators(self, dl_manager):
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"emotion":{"description":"Emotion is a dataset of English Twitter messages with six basic emotions: anger, fear, joy, love, sadness, and surprise. For more detailed information please refer to the\npaper.\n","citation":"@inproceedings{saravia-etal-2018-carer,\n title = \"{CARER}: Contextualized Affect Representations for Emotion Recognition\",\n author = \"Saravia, Elvis and\n Liu, Hsien-Chi Toby and\n Huang, Yen-Hao and\n Wu, Junlin and\n Chen, Yi-Shin\",\n booktitle = \"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing\",\n month = oct # \"-\" # nov,\n year = \"2018\",\n address = \"Brussels, Belgium\",\n publisher = \"Association for Computational Linguistics\",\n url = \"https://www.aclweb.org/anthology/D18-1404\",\n doi = \"10.18653/v1/D18-1404\",\n pages = \"3687--3697\",\n abstract = \"Emotions are expressed in nuanced ways, which varies by collective or individual experiences, knowledge, and beliefs. Therefore, to understand emotion, as conveyed through text, a robust mechanism capable of capturing and modeling different linguistic nuances and phenomena is needed. We propose a semi-supervised, graph-based algorithm to produce rich structural descriptors which serve as the building blocks for constructing contextualized affect representations from text. The pattern-based representations are further enriched with word embeddings and evaluated through several emotion recognition tasks. Our experimental results demonstrate that the proposed method outperforms state-of-the-art techniques on emotion recognition tasks.\",\n}\n","homepage":"https://github.com/dair-ai/emotion_dataset","license":"","features":{"text":{"dtype":"string","id":null,"_type":"Value"},"label":{"dtype":"string","id":null,"_type":"Value"}},"post_processed":null,"supervised_keys":null,"builder_name":"emotion","config_name":"emotion","version":{"version_str":"0.1.0","description":"First Emotion release","major":0,"minor":1,"patch":0},"splits":{"train":{"name":"train","num_bytes":1754632,"num_examples":16000,"dataset_name":"emotion"},"validation":{"name":"validation","num_bytes":216248,"num_examples":2000,"dataset_name":"emotion"},"test":{"name":"test","num_bytes":218768,"num_examples":2000,"dataset_name":"emotion"}},"download_checksums":{"https://www.dropbox.com/s/1pzkadrvffbqw6o/train.txt?dl=1":{"num_bytes":1658616,"checksum":"3ab03d945a6cb783d818ccd06dafd52d2ed8b4f62f0f85a09d7d11870865b190"},"https://www.dropbox.com/s/2mzialpsgf9k5l3/val.txt?dl=1":{"num_bytes":204240,"checksum":"34faaa31962fe63cdf5dbf6c132ef8ab166c640254ab991af78f3aea375e79ef"},"https://www.dropbox.com/s/ikkqxfdbdec3fuj/test.txt?dl=1":{"num_bytes":206760,"checksum":"60f531690d20127339e7f054edc299a82c627b5ec0dd5d552d53d544e0cfcc17"}},"download_size":2069616,"post_processing_size":null,"dataset_size":2189648,"size_in_bytes":4259264},"default":{"description":"Emotion is a dataset of English Twitter messages with six basic emotions: anger, fear, joy, love, sadness, and surprise. For more detailed information please refer to the paper.\n","citation":"@inproceedings{saravia-etal-2018-carer,\n title = \"{CARER}: Contextualized Affect Representations for Emotion Recognition\",\n author = \"Saravia, Elvis and\n Liu, Hsien-Chi Toby and\n Huang, Yen-Hao and\n Wu, Junlin and\n Chen, Yi-Shin\",\n booktitle = \"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing\",\n month = oct # \"-\" # nov,\n year = \"2018\",\n address = \"Brussels, Belgium\",\n publisher = \"Association for Computational Linguistics\",\n url = \"https://www.aclweb.org/anthology/D18-1404\",\n doi = \"10.18653/v1/D18-1404\",\n pages = \"3687--3697\",\n abstract = \"Emotions are expressed in nuanced ways, which varies by collective or individual experiences, knowledge, and beliefs. Therefore, to understand emotion, as conveyed through text, a robust mechanism capable of capturing and modeling different linguistic nuances and phenomena is needed. We propose a semi-supervised, graph-based algorithm to produce rich structural descriptors which serve as the building blocks for constructing contextualized affect representations from text. The pattern-based representations are further enriched with word embeddings and evaluated through several emotion recognition tasks. Our experimental results demonstrate that the proposed method outperforms state-of-the-art techniques on emotion recognition tasks.\",\n}\n","homepage":"https://github.com/dair-ai/emotion_dataset","license":"","features":{"text":{"dtype":"string","id":null,"_type":"Value"},"label":{"num_classes":6,"names":["sadness","joy","love","anger","fear","surprise"],"names_file":null,"id":null,"_type":"ClassLabel"}},"post_processed":null,"supervised_keys":{"input":"text","output":"label"},"builder_name":"emotion","config_name":"default","version":{"version_str":"0.0.0","description":null,"major":0,"minor":0,"patch":0},"splits":{"train":{"name":"train","num_bytes":1741541,"num_examples":16000,"dataset_name":"emotion"},"validation":{"name":"validation","num_bytes":214699,"num_examples":2000,"dataset_name":"emotion"},"test":{"name":"test","num_bytes":217177,"num_examples":2000,"dataset_name":"emotion"}},"download_checksums":{"https://www.dropbox.com/s/1pzkadrvffbqw6o/train.txt?dl=1":{"num_bytes":1658616,"checksum":"3ab03d945a6cb783d818ccd06dafd52d2ed8b4f62f0f85a09d7d11870865b190"},"https://www.dropbox.com/s/2mzialpsgf9k5l3/val.txt?dl=1":{"num_bytes":204240,"checksum":"34faaa31962fe63cdf5dbf6c132ef8ab166c640254ab991af78f3aea375e79ef"},"https://www.dropbox.com/s/ikkqxfdbdec3fuj/test.txt?dl=1":{"num_bytes":206760,"checksum":"60f531690d20127339e7f054edc299a82c627b5ec0dd5d552d53d544e0cfcc17"}},"download_size":2069616,"post_processing_size":null,"dataset_size":2173417,"size_in_bytes":4243033}}
		{"default": {"description": "Emotion is a dataset of English Twitter messages with six basic emotions: anger, fear, joy, love, sadness, and surprise. For more detailed information please refer to the paper.\n", "citation": "@inproceedings{saravia-etal-2018-carer,\n title = \"{CARER}: Contextualized Affect Representations for Emotion Recognition\",\n author = \"Saravia, Elvis and\n Liu, Hsien-Chi Toby and\n Huang, Yen-Hao and\n Wu, Junlin and\n Chen, Yi-Shin\",\n booktitle = \"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing\",\n month = oct # \"-\" # nov,\n year = \"2018\",\n address = \"Brussels, Belgium\",\n publisher = \"Association for Computational Linguistics\",\n url = \"https://www.aclweb.org/anthology/D18-1404\",\n doi = \"10.18653/v1/D18-1404\",\n pages = \"3687--3697\",\n abstract = \"Emotions are expressed in nuanced ways, which varies by collective or individual experiences, knowledge, and beliefs. Therefore, to understand emotion, as conveyed through text, a robust mechanism capable of capturing and modeling different linguistic nuances and phenomena is needed. We propose a semi-supervised, graph-based algorithm to produce rich structural descriptors which serve as the building blocks for constructing contextualized affect representations from text. The pattern-based representations are further enriched with word embeddings and evaluated through several emotion recognition tasks. Our experimental results demonstrate that the proposed method outperforms state-of-the-art techniques on emotion recognition tasks.\",\n}\n", "homepage": "https://github.com/dair-ai/emotion_dataset", "license": "", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 6, "names": ["sadness", "joy", "love", "anger", "fear", "surprise"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "post_processed": null, "supervised_keys": {"input": "text", "output": "label"}, "task_templates": [{"task": "text-classification", "text_column": "text", "label_column": "label", "labels": ["anger", "fear", "joy", "love", "sadness", "surprise"]}], "builder_name": "emotion", "config_name": "default", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1741541, "num_examples": 16000, "dataset_name": "emotion"}, "validation": {"name": "validation", "num_bytes": 214699, "num_examples": 2000, "dataset_name": "emotion"}, "test": {"name": "test", "num_bytes": 217177, "num_examples": 2000, "dataset_name": "emotion"}}, "download_checksums": {"https://www.dropbox.com/s/1pzkadrvffbqw6o/train.txt?dl=1": {"num_bytes": 1658616, "checksum": "3ab03d945a6cb783d818ccd06dafd52d2ed8b4f62f0f85a09d7d11870865b190"}, "https://www.dropbox.com/s/2mzialpsgf9k5l3/val.txt?dl=1": {"num_bytes": 204240, "checksum": "34faaa31962fe63cdf5dbf6c132ef8ab166c640254ab991af78f3aea375e79ef"}, "https://www.dropbox.com/s/ikkqxfdbdec3fuj/test.txt?dl=1": {"num_bytes": 206760, "checksum": "60f531690d20127339e7f054edc299a82c627b5ec0dd5d552d53d544e0cfcc17"}}, "download_size": 2069616, "post_processing_size": null, "dataset_size": 2173417, "size_in_bytes": 4243033}}