Skip to content

Commit 0550298

Browse files
committed
Revert task template insertion to account for API changes in PR huggingface#2392
1 parent ffea11a commit 0550298

86 files changed

Lines changed: 43 additions & 680 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

datasets/ag_news/ag_news.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import csv
2121

2222
import datasets
23-
from datasets.tasks import TextClassification
2423

2524

2625
_DESCRIPTION = """\
@@ -67,11 +66,6 @@ def _info(self):
6766
),
6867
homepage="http://groups.di.unipi.it/~gulli/AG_corpus_of_news_articles.html",
6968
citation=_CITATION,
70-
task_templates=[
71-
TextClassification(
72-
labels=("Business", "Sci/Tech", "Sports", "World"), text_column="text", label_column="label"
73-
)
74-
],
7569
)
7670

7771
def _split_generators(self, dl_manager):
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"default": {"description": "AG is a collection of more than 1 million news articles. News articles have been\ngathered from more than 2000 news sources by ComeToMyHead in more than 1 year of\nactivity. ComeToMyHead is an academic news search engine which has been running\nsince July, 2004. The dataset is provided by the academic comunity for research\npurposes in data mining (clustering, classification, etc), information retrieval\n(ranking, search, etc), xml, data compression, data streaming, and any other\nnon-commercial activity. For more information, please refer to the link\nhttp://www.di.unipi.it/~gulli/AG_corpus_of_news_articles.html .\n\nThe AG's news topic classification dataset is constructed by Xiang Zhang\n(xiang.zhang@nyu.edu) from the dataset above. It is used as a text\nclassification benchmark in the following paper: Xiang Zhang, Junbo Zhao, Yann\nLeCun. Character-level Convolutional Networks for Text Classification. Advances\nin Neural Information Processing Systems 28 (NIPS 2015).\n", "citation": "@inproceedings{Zhang2015CharacterlevelCN,\n title={Character-level Convolutional Networks for Text Classification},\n author={Xiang Zhang and Junbo Jake Zhao and Yann LeCun},\n booktitle={NIPS},\n year={2015}\n}\n", "homepage": "http://groups.di.unipi.it/~gulli/AG_corpus_of_news_articles.html", "license": "", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 4, "names": ["World", "Sports", "Business", "Sci/Tech"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"labels": ["Business", "Sci/Tech", "Sports", "World"], "text_column": "text", "label_column": "label"}], "builder_name": "ag_news", "config_name": "default", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 29817351, "num_examples": 120000, "dataset_name": "ag_news"}, "test": {"name": "test", "num_bytes": 1879478, "num_examples": 7600, "dataset_name": "ag_news"}}, "download_checksums": {"https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/train.csv": {"num_bytes": 29470338, "checksum": "76a0a2d2f92b286371fe4d4044640910a04a803fdd2538e0f3f29a5c6f6b672e"}, "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/test.csv": {"num_bytes": 1857427, "checksum": "521465c2428ed7f02f8d6db6ffdd4b5447c1c701962353eb2c40d548c3c85699"}}, "download_size": 31327765, "post_processing_size": null, "dataset_size": 31696829, "size_in_bytes": 63024594}}
1+
{"default": {"description": "AG is a collection of more than 1 million news articles. News articles have been \ngathered from more than 2000 news sources by ComeToMyHead in more than 1 year of \nactivity. ComeToMyHead is an academic news search engine which has been running \nsince July, 2004. The dataset is provided by the academic comunity for research \npurposes in data mining (clustering, classification, etc), information retrieval \n(ranking, search, etc), xml, data compression, data streaming, and any other \nnon-commercial activity. For more information, please refer to the link \nhttp://www.di.unipi.it/~gulli/AG_corpus_of_news_articles.html .\n\nThe AG's news topic classification dataset is constructed by Xiang Zhang \n(xiang.zhang@nyu.edu) from the dataset above. It is used as a text \nclassification benchmark in the following paper: Xiang Zhang, Junbo Zhao, Yann \nLeCun. Character-level Convolutional Networks for Text Classification. Advances \nin Neural Information Processing Systems 28 (NIPS 2015).\n", "citation": "@inproceedings{Zhang2015CharacterlevelCN,\n title={Character-level Convolutional Networks for Text Classification},\n author={Xiang Zhang and Junbo Jake Zhao and Yann LeCun},\n booktitle={NIPS},\n year={2015}\n}\n", "homepage": "http://groups.di.unipi.it/~gulli/AG_corpus_of_news_articles.html", "license": "", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 4, "names": ["World", "Sports", "Business", "Sci/Tech"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "supervised_keys": null, "builder_name": "ag_news", "config_name": "default", "version": {"version_str": "0.0.0", "description": null, "datasets_version_to_prepare": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 29817351, "num_examples": 120000, "dataset_name": "ag_news"}, "test": {"name": "test", "num_bytes": 1879478, "num_examples": 7600, "dataset_name": "ag_news"}}, "download_checksums": {"https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/train.csv": {"num_bytes": 29470338, "checksum": "76a0a2d2f92b286371fe4d4044640910a04a803fdd2538e0f3f29a5c6f6b672e"}, "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/test.csv": {"num_bytes": 1857427, "checksum": "521465c2428ed7f02f8d6db6ffdd4b5447c1c701962353eb2c40d548c3c85699"}}, "download_size": 31327765, "dataset_size": 31696829, "size_in_bytes": 63024594}}

datasets/ajgt_twitter_ar/ajgt_twitter_ar.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import pandas as pd
2424

2525
import datasets
26-
from datasets.tasks import TextClassification
2726

2827

2928
_DESCRIPTION = """\
@@ -84,9 +83,6 @@ def _info(self):
8483
supervised_keys=None,
8584
homepage="https://github.com/komari6/Arabic-twitter-corpus-AJGT",
8685
citation=_CITATION,
87-
task_templates=[
88-
TextClassification(labels=("Negative", "Positive"), text_column="text", label_column="label")
89-
],
9086
)
9187

9288
def _split_generators(self, dl_manager):
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"plain_text": {"description": "Arabic Jordanian General Tweets (AJGT) Corpus consisted of 1,800 tweets annotated as positive and negative. Modern Standard Arabic (MSA) or Jordanian dialect.\n", "citation": "@inproceedings{alomari2017arabic,\n title={Arabic tweets sentimental analysis using machine learning},\n author={Alomari, Khaled Mohammad and ElSherif, Hatem M and Shaalan, Khaled},\n booktitle={International Conference on Industrial, Engineering and Other Applications of Applied Intelligent Systems},\n pages={602--610},\n year={2017},\n organization={Springer}\n}\n", "homepage": "https://github.com/komari6/Arabic-twitter-corpus-AJGT", "license": "", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 2, "names": ["Negative", "Positive"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"labels": ["Negative", "Positive"], "text_column": "text", "label_column": "label"}], "builder_name": "ajgt_twitter_ar", "config_name": "plain_text", "version": {"version_str": "1.0.0", "description": "", "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 175424, "num_examples": 1800, "dataset_name": "ajgt_twitter_ar"}}, "download_checksums": {"https://raw.githubusercontent.com/komari6/Arabic-twitter-corpus-AJGT/master/AJGT.xlsx": {"num_bytes": 107395, "checksum": "966c52213872b6b8a3ced5fb7c60aee2abf47ca673c7d2c2eeb064a60bc9ed51"}}, "download_size": 107395, "post_processing_size": null, "dataset_size": 175424, "size_in_bytes": 282819}}
1+
{"plain_text": {"description": "Arabic Jordanian General Tweets (AJGT) Corpus consisted of 1,800 tweets annotated as positive and negative. Modern Standard Arabic (MSA) or Jordanian dialect.\n", "citation": "@inproceedings{alomari2017arabic,\n title={Arabic tweets sentimental analysis using machine learning},\n author={Alomari, Khaled Mohammad and ElSherif, Hatem M and Shaalan, Khaled},\n booktitle={International Conference on Industrial, Engineering and Other Applications of Applied Intelligent Systems},\n pages={602--610},\n year={2017},\n organization={Springer}\n}\n", "homepage": "https://github.com/komari6/Arabic-twitter-corpus-AJGT", "license": "", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 2, "names": ["Negative", "Positive"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "post_processed": null, "supervised_keys": null, "builder_name": "ajgt_twitter_ar", "config_name": "plain_text", "version": {"version_str": "1.0.0", "description": "", "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 175424, "num_examples": 1800, "dataset_name": "ajgt_twitter_ar"}}, "download_checksums": {"https://raw.githubusercontent.com/komari6/Arabic-twitter-corpus-AJGT/master/AJGT.xlsx": {"num_bytes": 107395, "checksum": "966c52213872b6b8a3ced5fb7c60aee2abf47ca673c7d2c2eeb064a60bc9ed51"}}, "download_size": 107395, "post_processing_size": null, "dataset_size": 175424, "size_in_bytes": 282819}}

datasets/allocine/allocine.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import os
66

77
import datasets
8-
from datasets.tasks import TextClassification
98

109

1110
_CITATION = """\
@@ -66,7 +65,6 @@ def _info(self):
6665
supervised_keys=None,
6766
homepage="https://github.com/TheophileBlard/french-sentiment-analysis-with-bert",
6867
citation=_CITATION,
69-
task_templates=[TextClassification(labels=("neg", "pos"), text_column="review", label_column="label")],
7068
)
7169

7270
def _split_generators(self, dl_manager):
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"allocine": {"description": " Allocine Dataset: A Large-Scale French Movie Reviews Dataset.\n This is a dataset for binary sentiment classification, made of user reviews scraped from Allocine.fr.\n It contains 100k positive and 100k negative reviews divided into 3 balanced splits: train (160k reviews), val (20k) and test (20k).\n", "citation": "@misc{blard2019allocine,\n author = {Blard, Theophile},\n title = {french-sentiment-analysis-with-bert},\n year = {2020},\n publisher = {GitHub},\n journal = {GitHub repository},\n howpublished={\\url{https://github.com/TheophileBlard/french-sentiment-analysis-with-bert}},\n}\n", "homepage": "https://github.com/TheophileBlard/french-sentiment-analysis-with-bert", "license": "", "features": {"review": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 2, "names": ["neg", "pos"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"labels": ["neg", "pos"], "text_column": "review", "label_column": "label"}], "builder_name": "allocine_dataset", "config_name": "allocine", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 91330696, "num_examples": 160000, "dataset_name": "allocine_dataset"}, "validation": {"name": "validation", "num_bytes": 11546250, "num_examples": 20000, "dataset_name": "allocine_dataset"}, "test": {"name": "test", "num_bytes": 11547697, "num_examples": 20000, "dataset_name": "allocine_dataset"}}, "download_checksums": {"https://github.com/TheophileBlard/french-sentiment-analysis-with-bert/raw/master/allocine_dataset/data.tar.bz2": {"num_bytes": 66625305, "checksum": "8c49a8cac783da201697ed1a91b36d2f6618222b3b7ea1c2996f2a3fbc37dfb4"}}, "download_size": 66625305, "post_processing_size": null, "dataset_size": 114424643, "size_in_bytes": 181049948}}
1+
{"allocine": {"description": " Allocine Dataset: A Large-Scale French Movie Reviews Dataset.\n This is a dataset for binary sentiment classification, made of user reviews scraped from Allocine.fr.\n It contains 100k positive and 100k negative reviews divided into 3 balanced splits: train (160k reviews), val (20k) and test (20k).\n", "citation": "@misc{blard2019allocine,\n author = {Blard, Theophile},\n title = {french-sentiment-analysis-with-bert},\n year = {2020},\n publisher = {GitHub},\n journal = {GitHub repository},\n howpublished={\\url{https://github.com/TheophileBlard/french-sentiment-analysis-with-bert}},\n}\n", "homepage": "https://github.com/TheophileBlard/french-sentiment-analysis-with-bert", "license": "", "features": {"review": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 2, "names": ["neg", "pos"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "supervised_keys": null, "builder_name": "allocine_dataset", "config_name": "allocine", "version": {"version_str": "1.0.0", "description": null, "datasets_version_to_prepare": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 91330696, "num_examples": 160000, "dataset_name": "allocine_dataset"}, "validation": {"name": "validation", "num_bytes": 11546250, "num_examples": 20000, "dataset_name": "allocine_dataset"}, "test": {"name": "test", "num_bytes": 11547697, "num_examples": 20000, "dataset_name": "allocine_dataset"}}, "download_checksums": {"https://github.com/TheophileBlard/french-sentiment-analysis-with-bert/raw/master/allocine_dataset/data.tar.bz2": {"num_bytes": 66625305, "checksum": "8c49a8cac783da201697ed1a91b36d2f6618222b3b7ea1c2996f2a3fbc37dfb4"}}, "download_size": 66625305, "dataset_size": 114424643, "size_in_bytes": 181049948}}

datasets/banking77/banking77.py

Lines changed: 0 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import csv
1919

2020
import datasets
21-
from datasets.tasks import TextClassification
2221

2322

2423
_CITATION = """\
@@ -148,91 +147,6 @@ def _info(self):
148147
homepage=_HOMEPAGE,
149148
license=_LICENSE,
150149
citation=_CITATION,
151-
task_templates=[
152-
TextClassification(
153-
labels=(
154-
"Refund_not_showing_up",
155-
"activate_my_card",
156-
"age_limit",
157-
"apple_pay_or_google_pay",
158-
"atm_support",
159-
"automatic_top_up",
160-
"balance_not_updated_after_bank_transfer",
161-
"balance_not_updated_after_cheque_or_cash_deposit",
162-
"beneficiary_not_allowed",
163-
"cancel_transfer",
164-
"card_about_to_expire",
165-
"card_acceptance",
166-
"card_arrival",
167-
"card_delivery_estimate",
168-
"card_linking",
169-
"card_not_working",
170-
"card_payment_fee_charged",
171-
"card_payment_not_recognised",
172-
"card_payment_wrong_exchange_rate",
173-
"card_swallowed",
174-
"cash_withdrawal_charge",
175-
"cash_withdrawal_not_recognised",
176-
"change_pin",
177-
"compromised_card",
178-
"contactless_not_working",
179-
"country_support",
180-
"declined_card_payment",
181-
"declined_cash_withdrawal",
182-
"declined_transfer",
183-
"direct_debit_payment_not_recognised",
184-
"disposable_card_limits",
185-
"edit_personal_details",
186-
"exchange_charge",
187-
"exchange_rate",
188-
"exchange_via_app",
189-
"extra_charge_on_statement",
190-
"failed_transfer",
191-
"fiat_currency_support",
192-
"get_disposable_virtual_card",
193-
"get_physical_card",
194-
"getting_spare_card",
195-
"getting_virtual_card",
196-
"lost_or_stolen_card",
197-
"lost_or_stolen_phone",
198-
"order_physical_card",
199-
"passcode_forgotten",
200-
"pending_card_payment",
201-
"pending_cash_withdrawal",
202-
"pending_top_up",
203-
"pending_transfer",
204-
"pin_blocked",
205-
"receiving_money",
206-
"request_refund",
207-
"reverted_card_payment?",
208-
"supported_cards_and_currencies",
209-
"terminate_account",
210-
"top_up_by_bank_transfer_charge",
211-
"top_up_by_card_charge",
212-
"top_up_by_cash_or_cheque",
213-
"top_up_failed",
214-
"top_up_limits",
215-
"top_up_reverted",
216-
"topping_up_by_card",
217-
"transaction_charged_twice",
218-
"transfer_fee_charged",
219-
"transfer_into_account",
220-
"transfer_not_received_by_recipient",
221-
"transfer_timing",
222-
"unable_to_verify_identity",
223-
"verify_my_identity",
224-
"verify_source_of_funds",
225-
"verify_top_up",
226-
"virtual_card_not_working",
227-
"visa_or_mastercard",
228-
"why_verify_identity",
229-
"wrong_amount_of_cash_received",
230-
"wrong_exchange_rate_for_cash_withdrawal",
231-
),
232-
text_column="text",
233-
label_column="label",
234-
)
235-
],
236150
)
237151

238152
def _split_generators(self, dl_manager):

0 commit comments

Comments
 (0)