lewtun
diff --git a/‎datasets/ag_news/ag_news.py‎
Lines changed: 0 additions & 6 deletions b/‎datasets/ag_news/ag_news.py‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎datasets/ag_news/dataset_infos.json‎
Lines changed: 1 addition & 1 deletion b/‎datasets/ag_news/dataset_infos.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎datasets/ajgt_twitter_ar/ajgt_twitter_ar.py‎
Lines changed: 0 additions & 4 deletions b/‎datasets/ajgt_twitter_ar/ajgt_twitter_ar.py‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎datasets/ajgt_twitter_ar/dataset_infos.json‎
Lines changed: 1 addition & 1 deletion b/‎datasets/ajgt_twitter_ar/dataset_infos.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎datasets/allocine/allocine.py‎
Lines changed: 0 additions & 2 deletions b/‎datasets/allocine/allocine.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎datasets/allocine/dataset_infos.json‎
Lines changed: 1 addition & 1 deletion b/‎datasets/allocine/dataset_infos.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎datasets/banking77/banking77.py‎
Lines changed: 0 additions & 86 deletions b/‎datasets/banking77/banking77.py‎
Lines changed: 0 additions & 86 deletions
@@ -20,7 +20,6 @@
 import csv
 
 import datasets
-from datasets.tasks import TextClassification
 
 
 _DESCRIPTION = """\
@@ -67,11 +66,6 @@ def _info(self):
             ),
             homepage="http://groups.di.unipi.it/~gulli/AG_corpus_of_news_articles.html",
             citation=_CITATION,
-            task_templates=[
-                TextClassification(
-                    labels=("Business", "Sci/Tech", "Sports", "World"), text_column="text", label_column="label"
-                )
-            ],
         )
 
     def _split_generators(self, dl_manager):
 
@@ -1 +1 @@
-{"default": {"description": "AG is a collection of more than 1 million news articles. News articles have been\ngathered from more than 2000 news sources by ComeToMyHead in more than 1 year of\nactivity. ComeToMyHead is an academic news search engine which has been running\nsince July, 2004. The dataset is provided by the academic comunity for research\npurposes in data mining (clustering, classification, etc), information retrieval\n(ranking, search, etc), xml, data compression, data streaming, and any other\nnon-commercial activity. For more information, please refer to the link\nhttp://www.di.unipi.it/~gulli/AG_corpus_of_news_articles.html .\n\nThe AG's news topic classification dataset is constructed by Xiang Zhang\n(xiang.zhang@nyu.edu) from the dataset above. It is used as a text\nclassification benchmark in the following paper: Xiang Zhang, Junbo Zhao, Yann\nLeCun. Character-level Convolutional Networks for Text Classification. Advances\nin Neural Information Processing Systems 28 (NIPS 2015).\n", "citation": "@inproceedings{Zhang2015CharacterlevelCN,\n  title={Character-level Convolutional Networks for Text Classification},\n  author={Xiang Zhang and Junbo Jake Zhao and Yann LeCun},\n  booktitle={NIPS},\n  year={2015}\n}\n", "homepage": "http://groups.di.unipi.it/~gulli/AG_corpus_of_news_articles.html", "license": "", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 4, "names": ["World", "Sports", "Business", "Sci/Tech"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"labels": ["Business", "Sci/Tech", "Sports", "World"], "text_column": "text", "label_column": "label"}], "builder_name": "ag_news", "config_name": "default", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 29817351, "num_examples": 120000, "dataset_name": "ag_news"}, "test": {"name": "test", "num_bytes": 1879478, "num_examples": 7600, "dataset_name": "ag_news"}}, "download_checksums": {"https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/train.csv": {"num_bytes": 29470338, "checksum": "76a0a2d2f92b286371fe4d4044640910a04a803fdd2538e0f3f29a5c6f6b672e"}, "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/test.csv": {"num_bytes": 1857427, "checksum": "521465c2428ed7f02f8d6db6ffdd4b5447c1c701962353eb2c40d548c3c85699"}}, "download_size": 31327765, "post_processing_size": null, "dataset_size": 31696829, "size_in_bytes": 63024594}}
+{"default": {"description": "AG is a collection of more than 1 million news articles. News articles have been \ngathered from more than 2000 news sources by ComeToMyHead in more than 1 year of \nactivity. ComeToMyHead is an academic news search engine which has been running \nsince July, 2004. The dataset is provided by the academic comunity for research \npurposes in data mining (clustering, classification, etc), information retrieval \n(ranking, search, etc), xml, data compression, data streaming, and any other \nnon-commercial activity. For more information, please refer to the link \nhttp://www.di.unipi.it/~gulli/AG_corpus_of_news_articles.html .\n\nThe AG's news topic classification dataset is constructed by Xiang Zhang \n(xiang.zhang@nyu.edu) from the dataset above. It is used as a text \nclassification benchmark in the following paper: Xiang Zhang, Junbo Zhao, Yann \nLeCun. Character-level Convolutional Networks for Text Classification. Advances \nin Neural Information Processing Systems 28 (NIPS 2015).\n", "citation": "@inproceedings{Zhang2015CharacterlevelCN,\n  title={Character-level Convolutional Networks for Text Classification},\n  author={Xiang Zhang and Junbo Jake Zhao and Yann LeCun},\n  booktitle={NIPS},\n  year={2015}\n}\n", "homepage": "http://groups.di.unipi.it/~gulli/AG_corpus_of_news_articles.html", "license": "", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 4, "names": ["World", "Sports", "Business", "Sci/Tech"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "supervised_keys": null, "builder_name": "ag_news", "config_name": "default", "version": {"version_str": "0.0.0", "description": null, "datasets_version_to_prepare": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 29817351, "num_examples": 120000, "dataset_name": "ag_news"}, "test": {"name": "test", "num_bytes": 1879478, "num_examples": 7600, "dataset_name": "ag_news"}}, "download_checksums": {"https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/train.csv": {"num_bytes": 29470338, "checksum": "76a0a2d2f92b286371fe4d4044640910a04a803fdd2538e0f3f29a5c6f6b672e"}, "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/test.csv": {"num_bytes": 1857427, "checksum": "521465c2428ed7f02f8d6db6ffdd4b5447c1c701962353eb2c40d548c3c85699"}}, "download_size": 31327765, "dataset_size": 31696829, "size_in_bytes": 63024594}}
@@ -23,7 +23,6 @@
 import pandas as pd
 
 import datasets
-from datasets.tasks import TextClassification
 
 
 _DESCRIPTION = """\
@@ -84,9 +83,6 @@ def _info(self):
             supervised_keys=None,
             homepage="https://github.com/komari6/Arabic-twitter-corpus-AJGT",
             citation=_CITATION,
-            task_templates=[
-                TextClassification(labels=("Negative", "Positive"), text_column="text", label_column="label")
-            ],
         )
 
     def _split_generators(self, dl_manager):
 
@@ -1 +1 @@
-{"plain_text": {"description": "Arabic Jordanian General Tweets (AJGT) Corpus consisted of 1,800 tweets annotated as positive and negative. Modern Standard Arabic (MSA) or Jordanian dialect.\n", "citation": "@inproceedings{alomari2017arabic,\n  title={Arabic tweets sentimental analysis using machine learning},\n  author={Alomari, Khaled Mohammad and ElSherif, Hatem M and Shaalan, Khaled},\n  booktitle={International Conference on Industrial, Engineering and Other Applications of Applied Intelligent Systems},\n  pages={602--610},\n  year={2017},\n  organization={Springer}\n}\n", "homepage": "https://github.com/komari6/Arabic-twitter-corpus-AJGT", "license": "", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 2, "names": ["Negative", "Positive"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"labels": ["Negative", "Positive"], "text_column": "text", "label_column": "label"}], "builder_name": "ajgt_twitter_ar", "config_name": "plain_text", "version": {"version_str": "1.0.0", "description": "", "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 175424, "num_examples": 1800, "dataset_name": "ajgt_twitter_ar"}}, "download_checksums": {"https://raw.githubusercontent.com/komari6/Arabic-twitter-corpus-AJGT/master/AJGT.xlsx": {"num_bytes": 107395, "checksum": "966c52213872b6b8a3ced5fb7c60aee2abf47ca673c7d2c2eeb064a60bc9ed51"}}, "download_size": 107395, "post_processing_size": null, "dataset_size": 175424, "size_in_bytes": 282819}}
+{"plain_text": {"description": "Arabic Jordanian General Tweets (AJGT) Corpus consisted of 1,800 tweets annotated as positive and negative. Modern Standard Arabic (MSA) or Jordanian dialect.\n", "citation": "@inproceedings{alomari2017arabic,\n  title={Arabic tweets sentimental analysis using machine learning},\n  author={Alomari, Khaled Mohammad and ElSherif, Hatem M and Shaalan, Khaled},\n  booktitle={International Conference on Industrial, Engineering and Other Applications of Applied Intelligent Systems},\n  pages={602--610},\n  year={2017},\n  organization={Springer}\n}\n", "homepage": "https://github.com/komari6/Arabic-twitter-corpus-AJGT", "license": "", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 2, "names": ["Negative", "Positive"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "post_processed": null, "supervised_keys": null, "builder_name": "ajgt_twitter_ar", "config_name": "plain_text", "version": {"version_str": "1.0.0", "description": "", "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 175424, "num_examples": 1800, "dataset_name": "ajgt_twitter_ar"}}, "download_checksums": {"https://raw.githubusercontent.com/komari6/Arabic-twitter-corpus-AJGT/master/AJGT.xlsx": {"num_bytes": 107395, "checksum": "966c52213872b6b8a3ced5fb7c60aee2abf47ca673c7d2c2eeb064a60bc9ed51"}}, "download_size": 107395, "post_processing_size": null, "dataset_size": 175424, "size_in_bytes": 282819}}
@@ -5,7 +5,6 @@
 import os
 
 import datasets
-from datasets.tasks import TextClassification
 
 
 _CITATION = """\
@@ -66,7 +65,6 @@ def _info(self):
             supervised_keys=None,
             homepage="https://github.com/TheophileBlard/french-sentiment-analysis-with-bert",
             citation=_CITATION,
-            task_templates=[TextClassification(labels=("neg", "pos"), text_column="review", label_column="label")],
         )
 
     def _split_generators(self, dl_manager):
 
@@ -1 +1 @@
-{"allocine": {"description": " Allocine Dataset: A Large-Scale French Movie Reviews Dataset.\n This is a dataset for binary sentiment classification, made of user reviews scraped from Allocine.fr.\n It contains 100k positive and 100k negative reviews divided into 3 balanced splits: train (160k reviews), val (20k) and test (20k).\n", "citation": "@misc{blard2019allocine,\n  author = {Blard, Theophile},\n  title = {french-sentiment-analysis-with-bert},\n  year = {2020},\n  publisher = {GitHub},\n  journal = {GitHub repository},\n  howpublished={\\url{https://github.com/TheophileBlard/french-sentiment-analysis-with-bert}},\n}\n", "homepage": "https://github.com/TheophileBlard/french-sentiment-analysis-with-bert", "license": "", "features": {"review": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 2, "names": ["neg", "pos"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"labels": ["neg", "pos"], "text_column": "review", "label_column": "label"}], "builder_name": "allocine_dataset", "config_name": "allocine", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 91330696, "num_examples": 160000, "dataset_name": "allocine_dataset"}, "validation": {"name": "validation", "num_bytes": 11546250, "num_examples": 20000, "dataset_name": "allocine_dataset"}, "test": {"name": "test", "num_bytes": 11547697, "num_examples": 20000, "dataset_name": "allocine_dataset"}}, "download_checksums": {"https://github.com/TheophileBlard/french-sentiment-analysis-with-bert/raw/master/allocine_dataset/data.tar.bz2": {"num_bytes": 66625305, "checksum": "8c49a8cac783da201697ed1a91b36d2f6618222b3b7ea1c2996f2a3fbc37dfb4"}}, "download_size": 66625305, "post_processing_size": null, "dataset_size": 114424643, "size_in_bytes": 181049948}}
+{"allocine": {"description": " Allocine Dataset: A Large-Scale French Movie Reviews Dataset.\n This is a dataset for binary sentiment classification, made of user reviews scraped from Allocine.fr.\n It contains 100k positive and 100k negative reviews divided into 3 balanced splits: train (160k reviews), val (20k) and test (20k).\n", "citation": "@misc{blard2019allocine,\n  author = {Blard, Theophile},\n  title = {french-sentiment-analysis-with-bert},\n  year = {2020},\n  publisher = {GitHub},\n  journal = {GitHub repository},\n  howpublished={\\url{https://github.com/TheophileBlard/french-sentiment-analysis-with-bert}},\n}\n", "homepage": "https://github.com/TheophileBlard/french-sentiment-analysis-with-bert", "license": "", "features": {"review": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 2, "names": ["neg", "pos"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "supervised_keys": null, "builder_name": "allocine_dataset", "config_name": "allocine", "version": {"version_str": "1.0.0", "description": null, "datasets_version_to_prepare": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 91330696, "num_examples": 160000, "dataset_name": "allocine_dataset"}, "validation": {"name": "validation", "num_bytes": 11546250, "num_examples": 20000, "dataset_name": "allocine_dataset"}, "test": {"name": "test", "num_bytes": 11547697, "num_examples": 20000, "dataset_name": "allocine_dataset"}}, "download_checksums": {"https://github.com/TheophileBlard/french-sentiment-analysis-with-bert/raw/master/allocine_dataset/data.tar.bz2": {"num_bytes": 66625305, "checksum": "8c49a8cac783da201697ed1a91b36d2f6618222b3b7ea1c2996f2a3fbc37dfb4"}}, "download_size": 66625305, "dataset_size": 114424643, "size_in_bytes": 181049948}}
@@ -18,7 +18,6 @@
 import csv
 
 import datasets
-from datasets.tasks import TextClassification
 
 
 _CITATION = """\
@@ -148,91 +147,6 @@ def _info(self):
             homepage=_HOMEPAGE,
             license=_LICENSE,
             citation=_CITATION,
-            task_templates=[
-                TextClassification(
-                    labels=(
-                        "Refund_not_showing_up",
-                        "activate_my_card",
-                        "age_limit",
-                        "apple_pay_or_google_pay",
-                        "atm_support",
-                        "automatic_top_up",
-                        "balance_not_updated_after_bank_transfer",
-                        "balance_not_updated_after_cheque_or_cash_deposit",
-                        "beneficiary_not_allowed",
-                        "cancel_transfer",
-                        "card_about_to_expire",
-                        "card_acceptance",
-                        "card_arrival",
-                        "card_delivery_estimate",
-                        "card_linking",
-                        "card_not_working",
-                        "card_payment_fee_charged",
-                        "card_payment_not_recognised",
-                        "card_payment_wrong_exchange_rate",
-                        "card_swallowed",
-                        "cash_withdrawal_charge",
-                        "cash_withdrawal_not_recognised",
-                        "change_pin",
-                        "compromised_card",
-                        "contactless_not_working",
-                        "country_support",
-                        "declined_card_payment",
-                        "declined_cash_withdrawal",
-                        "declined_transfer",
-                        "direct_debit_payment_not_recognised",
-                        "disposable_card_limits",
-                        "edit_personal_details",
-                        "exchange_charge",
-                        "exchange_rate",
-                        "exchange_via_app",
-                        "extra_charge_on_statement",
-                        "failed_transfer",
-                        "fiat_currency_support",
-                        "get_disposable_virtual_card",
-                        "get_physical_card",
-                        "getting_spare_card",
-                        "getting_virtual_card",
-                        "lost_or_stolen_card",
-                        "lost_or_stolen_phone",
-                        "order_physical_card",
-                        "passcode_forgotten",
-                        "pending_card_payment",
-                        "pending_cash_withdrawal",
-                        "pending_top_up",
-                        "pending_transfer",
-                        "pin_blocked",
-                        "receiving_money",
-                        "request_refund",
-                        "reverted_card_payment?",
-                        "supported_cards_and_currencies",
-                        "terminate_account",
-                        "top_up_by_bank_transfer_charge",
-                        "top_up_by_card_charge",
-                        "top_up_by_cash_or_cheque",
-                        "top_up_failed",
-                        "top_up_limits",
-                        "top_up_reverted",
-                        "topping_up_by_card",
-                        "transaction_charged_twice",
-                        "transfer_fee_charged",
-                        "transfer_into_account",
-                        "transfer_not_received_by_recipient",
-                        "transfer_timing",
-                        "unable_to_verify_identity",
-                        "verify_my_identity",
-                        "verify_source_of_funds",
-                        "verify_top_up",
-                        "virtual_card_not_working",
-                        "visa_or_mastercard",
-                        "why_verify_identity",
-                        "wrong_amount_of_cash_received",
-                        "wrong_exchange_rate_for_cash_withdrawal",
-                    ),
-                    text_column="text",
-                    label_column="label",
-                )
-            ],
         )
 
     def _split_generators(self, dl_manager):
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		-{"default": {"description": "AG is a collection of more than 1 million news articles. News articles have been\ngathered from more than 2000 news sources by ComeToMyHead in more than 1 year of\nactivity. ComeToMyHead is an academic news search engine which has been running\nsince July, 2004. The dataset is provided by the academic comunity for research\npurposes in data mining (clustering, classification, etc), information retrieval\n(ranking, search, etc), xml, data compression, data streaming, and any other\nnon-commercial activity. For more information, please refer to the link\nhttp://www.di.unipi.it/~gulli/AG_corpus_of_news_articles.html .\n\nThe AG's news topic classification dataset is constructed by Xiang Zhang\n(xiang.zhang@nyu.edu) from the dataset above. It is used as a text\nclassification benchmark in the following paper: Xiang Zhang, Junbo Zhao, Yann\nLeCun. Character-level Convolutional Networks for Text Classification. Advances\nin Neural Information Processing Systems 28 (NIPS 2015).\n", "citation": "@inproceedings{Zhang2015CharacterlevelCN,\n title={Character-level Convolutional Networks for Text Classification},\n author={Xiang Zhang and Junbo Jake Zhao and Yann LeCun},\n booktitle={NIPS},\n year={2015}\n}\n", "homepage": "http://groups.di.unipi.it/~gulli/AG_corpus_of_news_articles.html", "license": "", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 4, "names": ["World", "Sports", "Business", "Sci/Tech"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"labels": ["Business", "Sci/Tech", "Sports", "World"], "text_column": "text", "label_column": "label"}], "builder_name": "ag_news", "config_name": "default", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 29817351, "num_examples": 120000, "dataset_name": "ag_news"}, "test": {"name": "test", "num_bytes": 1879478, "num_examples": 7600, "dataset_name": "ag_news"}}, "download_checksums": {"https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/train.csv": {"num_bytes": 29470338, "checksum": "76a0a2d2f92b286371fe4d4044640910a04a803fdd2538e0f3f29a5c6f6b672e"}, "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/test.csv": {"num_bytes": 1857427, "checksum": "521465c2428ed7f02f8d6db6ffdd4b5447c1c701962353eb2c40d548c3c85699"}}, "download_size": 31327765, "post_processing_size": null, "dataset_size": 31696829, "size_in_bytes": 63024594}}
	`1`	+{"default": {"description": "AG is a collection of more than 1 million news articles. News articles have been \ngathered from more than 2000 news sources by ComeToMyHead in more than 1 year of \nactivity. ComeToMyHead is an academic news search engine which has been running \nsince July, 2004. The dataset is provided by the academic comunity for research \npurposes in data mining (clustering, classification, etc), information retrieval \n(ranking, search, etc), xml, data compression, data streaming, and any other \nnon-commercial activity. For more information, please refer to the link \nhttp://www.di.unipi.it/~gulli/AG_corpus_of_news_articles.html .\n\nThe AG's news topic classification dataset is constructed by Xiang Zhang \n(xiang.zhang@nyu.edu) from the dataset above. It is used as a text \nclassification benchmark in the following paper: Xiang Zhang, Junbo Zhao, Yann \nLeCun. Character-level Convolutional Networks for Text Classification. Advances \nin Neural Information Processing Systems 28 (NIPS 2015).\n", "citation": "@inproceedings{Zhang2015CharacterlevelCN,\n title={Character-level Convolutional Networks for Text Classification},\n author={Xiang Zhang and Junbo Jake Zhao and Yann LeCun},\n booktitle={NIPS},\n year={2015}\n}\n", "homepage": "http://groups.di.unipi.it/~gulli/AG_corpus_of_news_articles.html", "license": "", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 4, "names": ["World", "Sports", "Business", "Sci/Tech"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "supervised_keys": null, "builder_name": "ag_news", "config_name": "default", "version": {"version_str": "0.0.0", "description": null, "datasets_version_to_prepare": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 29817351, "num_examples": 120000, "dataset_name": "ag_news"}, "test": {"name": "test", "num_bytes": 1879478, "num_examples": 7600, "dataset_name": "ag_news"}}, "download_checksums": {"https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/train.csv": {"num_bytes": 29470338, "checksum": "76a0a2d2f92b286371fe4d4044640910a04a803fdd2538e0f3f29a5c6f6b672e"}, "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/test.csv": {"num_bytes": 1857427, "checksum": "521465c2428ed7f02f8d6db6ffdd4b5447c1c701962353eb2c40d548c3c85699"}}, "download_size": 31327765, "dataset_size": 31696829, "size_in_bytes": 63024594}}
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		-{"plain_text": {"description": "Arabic Jordanian General Tweets (AJGT) Corpus consisted of 1,800 tweets annotated as positive and negative. Modern Standard Arabic (MSA) or Jordanian dialect.\n", "citation": "@inproceedings{alomari2017arabic,\n title={Arabic tweets sentimental analysis using machine learning},\n author={Alomari, Khaled Mohammad and ElSherif, Hatem M and Shaalan, Khaled},\n booktitle={International Conference on Industrial, Engineering and Other Applications of Applied Intelligent Systems},\n pages={602--610},\n year={2017},\n organization={Springer}\n}\n", "homepage": "https://github.com/komari6/Arabic-twitter-corpus-AJGT", "license": "", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 2, "names": ["Negative", "Positive"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"labels": ["Negative", "Positive"], "text_column": "text", "label_column": "label"}], "builder_name": "ajgt_twitter_ar", "config_name": "plain_text", "version": {"version_str": "1.0.0", "description": "", "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 175424, "num_examples": 1800, "dataset_name": "ajgt_twitter_ar"}}, "download_checksums": {"https://raw.githubusercontent.com/komari6/Arabic-twitter-corpus-AJGT/master/AJGT.xlsx": {"num_bytes": 107395, "checksum": "966c52213872b6b8a3ced5fb7c60aee2abf47ca673c7d2c2eeb064a60bc9ed51"}}, "download_size": 107395, "post_processing_size": null, "dataset_size": 175424, "size_in_bytes": 282819}}
	`1`	+{"plain_text": {"description": "Arabic Jordanian General Tweets (AJGT) Corpus consisted of 1,800 tweets annotated as positive and negative. Modern Standard Arabic (MSA) or Jordanian dialect.\n", "citation": "@inproceedings{alomari2017arabic,\n title={Arabic tweets sentimental analysis using machine learning},\n author={Alomari, Khaled Mohammad and ElSherif, Hatem M and Shaalan, Khaled},\n booktitle={International Conference on Industrial, Engineering and Other Applications of Applied Intelligent Systems},\n pages={602--610},\n year={2017},\n organization={Springer}\n}\n", "homepage": "https://github.com/komari6/Arabic-twitter-corpus-AJGT", "license": "", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 2, "names": ["Negative", "Positive"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "post_processed": null, "supervised_keys": null, "builder_name": "ajgt_twitter_ar", "config_name": "plain_text", "version": {"version_str": "1.0.0", "description": "", "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 175424, "num_examples": 1800, "dataset_name": "ajgt_twitter_ar"}}, "download_checksums": {"https://raw.githubusercontent.com/komari6/Arabic-twitter-corpus-AJGT/master/AJGT.xlsx": {"num_bytes": 107395, "checksum": "966c52213872b6b8a3ced5fb7c60aee2abf47ca673c7d2c2eeb064a60bc9ed51"}}, "download_size": 107395, "post_processing_size": null, "dataset_size": 175424, "size_in_bytes": 282819}}
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		-{"allocine": {"description": " Allocine Dataset: A Large-Scale French Movie Reviews Dataset.\n This is a dataset for binary sentiment classification, made of user reviews scraped from Allocine.fr.\n It contains 100k positive and 100k negative reviews divided into 3 balanced splits: train (160k reviews), val (20k) and test (20k).\n", "citation": "@misc{blard2019allocine,\n author = {Blard, Theophile},\n title = {french-sentiment-analysis-with-bert},\n year = {2020},\n publisher = {GitHub},\n journal = {GitHub repository},\n howpublished={\\url{https://github.com/TheophileBlard/french-sentiment-analysis-with-bert}},\n}\n", "homepage": "https://github.com/TheophileBlard/french-sentiment-analysis-with-bert", "license": "", "features": {"review": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 2, "names": ["neg", "pos"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"labels": ["neg", "pos"], "text_column": "review", "label_column": "label"}], "builder_name": "allocine_dataset", "config_name": "allocine", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 91330696, "num_examples": 160000, "dataset_name": "allocine_dataset"}, "validation": {"name": "validation", "num_bytes": 11546250, "num_examples": 20000, "dataset_name": "allocine_dataset"}, "test": {"name": "test", "num_bytes": 11547697, "num_examples": 20000, "dataset_name": "allocine_dataset"}}, "download_checksums": {"https://github.com/TheophileBlard/french-sentiment-analysis-with-bert/raw/master/allocine_dataset/data.tar.bz2": {"num_bytes": 66625305, "checksum": "8c49a8cac783da201697ed1a91b36d2f6618222b3b7ea1c2996f2a3fbc37dfb4"}}, "download_size": 66625305, "post_processing_size": null, "dataset_size": 114424643, "size_in_bytes": 181049948}}
	`1`	+{"allocine": {"description": " Allocine Dataset: A Large-Scale French Movie Reviews Dataset.\n This is a dataset for binary sentiment classification, made of user reviews scraped from Allocine.fr.\n It contains 100k positive and 100k negative reviews divided into 3 balanced splits: train (160k reviews), val (20k) and test (20k).\n", "citation": "@misc{blard2019allocine,\n author = {Blard, Theophile},\n title = {french-sentiment-analysis-with-bert},\n year = {2020},\n publisher = {GitHub},\n journal = {GitHub repository},\n howpublished={\\url{https://github.com/TheophileBlard/french-sentiment-analysis-with-bert}},\n}\n", "homepage": "https://github.com/TheophileBlard/french-sentiment-analysis-with-bert", "license": "", "features": {"review": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 2, "names": ["neg", "pos"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "supervised_keys": null, "builder_name": "allocine_dataset", "config_name": "allocine", "version": {"version_str": "1.0.0", "description": null, "datasets_version_to_prepare": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 91330696, "num_examples": 160000, "dataset_name": "allocine_dataset"}, "validation": {"name": "validation", "num_bytes": 11546250, "num_examples": 20000, "dataset_name": "allocine_dataset"}, "test": {"name": "test", "num_bytes": 11547697, "num_examples": 20000, "dataset_name": "allocine_dataset"}}, "download_checksums": {"https://github.com/TheophileBlard/french-sentiment-analysis-with-bert/raw/master/allocine_dataset/data.tar.bz2": {"num_bytes": 66625305, "checksum": "8c49a8cac783da201697ed1a91b36d2f6618222b3b7ea1c2996f2a3fbc37dfb4"}}, "download_size": 66625305, "dataset_size": 114424643, "size_in_bytes": 181049948}}