Skip to content

Commit 7784e8f

Browse files
committed
start to convert dots to underscores
1 parent 0724d70 commit 7784e8f

30 files changed

+594
-600
lines changed

datasets/banking77/README.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
---
22
annotations_creators:
33
- expert-generated
4-
extended:
5-
- original
64
language_creators:
75
- expert-generated
86
languages:

datasets/c4/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,11 @@ This is the version prepared by AllenAI, hosted at this address: https://hugging
7070
It comes in four variants:
7171

7272
- `en`: 305GB in JSON format
73-
- `en.noblocklist`: 380GB in JSON format
74-
- `en.noclean`: 2.3TB in JSON format
73+
- `en_noblocklist`: 380GB in JSON format
74+
- `en_noclean`: 2.3TB in JSON format
7575
- `realnewslike`: 15GB in JSON format
7676

77-
The `en.noblocklist` variant is exactly the same as the `en` variant, except we turned off the so-called "badwords filter", which removes all documents that contain words from the lists at https://github.com/LDNOOBW/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words.
77+
The `en_noblocklist` variant is exactly the same as the `en` variant, except we turned off the so-called "badwords filter", which removes all documents that contain words from the lists at https://github.com/LDNOOBW/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words.
7878

7979
### Supported Tasks and Leaderboards
8080

datasets/c4/c4.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,13 @@
3131

3232
_URL = "https://github.com/allenai/allennlp/discussions/5056"
3333

34-
_VARIANTS = ["en", "realnewslike", "en.noblocklist", "en.noclean"]
34+
_VARIANTS = ["en", "realnewslike", "en_noblocklist", "en_noclean"]
3535

3636
_N_SHARDS_PER_SPLIT = {
3737
"en": {"train": 1024, "validation": 8},
3838
"realnewslike": {"train": 512, "validation": 1},
39-
"en.noblocklist": {"train": 1024, "validation": 8},
40-
"en.noclean": {"train": 7168, "validation": 64},
39+
"en_noblocklist": {"train": 1024, "validation": 8},
40+
"en_noclean": {"train": 7168, "validation": 64},
4141
}
4242

4343
_DATA_URL = "https://huggingface.co/datasets/allenai/c4/resolve/1ddc917116b730e1859edef32896ec5c16be51d0/{name}/c4-{split}.{index:05d}-of-{n_shards:05d}.json.gz"
@@ -68,7 +68,7 @@ def _split_generators(self, dl_manager):
6868
for split in ["train", "validation"]:
6969
n_shards = _N_SHARDS_PER_SPLIT[self.config.name][split]
7070
data_urls[split] = [
71-
_DATA_URL.format(name=self.config.name, split=split, index=index, n_shards=n_shards)
71+
_DATA_URL.format(name=self.config.name.replace("_", "."), split=split, index=index, n_shards=n_shards)
7272
for index in range(n_shards)
7373
]
7474
train_downloaded_files = dl_manager.download(data_urls["train"])

datasets/c4/dataset_infos.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

datasets/cnn_dailymail/cnn_dailymail.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ class CnnDailymail(datasets.GeneratorBasedBuilder):
193193
"""CNN/DailyMail non-anonymized summarization dataset."""
194194

195195
BUILDER_CONFIGS = [
196-
CnnDailymailConfig(name=str(version), description="Plain text", version=version)
196+
CnnDailymailConfig(name=str(version).replace(".", "_"), description="Plain text", version=version)
197197
for version in _SUPPORTED_VERSIONS
198198
]
199199

datasets/cnn_dailymail/dataset_infos.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

datasets/common_gen/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ paperswithcode_id: commongen
55
pretty_name: CommonGen
66
task_ids:
77
- text2text-generation-other-concepts-to-text
8-
tasks_categories:
8+
task_categories:
99
- text2text-generation
1010
---
1111

datasets/cos_e/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ inference in a novel Commonsense Auto-Generated Explanation (CAGE) framework.
5959

6060
### Data Instances
6161

62-
#### v1.0
62+
#### v1_0
6363

6464
- **Size of downloaded dataset files:** 4.10 MB
6565
- **Size of the generated dataset:** 2.23 MB
@@ -77,7 +77,7 @@ An example of 'train' looks as follows.
7777
}
7878
```
7979

80-
#### v1.11
80+
#### v1_11
8181

8282
- **Size of downloaded dataset files:** 6.23 MB
8383
- **Size of the generated dataset:** 2.91 MB
@@ -99,15 +99,15 @@ An example of 'train' looks as follows.
9999

100100
The data fields are the same among all splits.
101101

102-
#### v1.0
102+
#### v1_0
103103
- `id`: a `string` feature.
104104
- `question`: a `string` feature.
105105
- `choices`: a `list` of `string` features.
106106
- `answer`: a `string` feature.
107107
- `abstractive_explanation`: a `string` feature.
108108
- `extractive_explanation`: a `string` feature.
109109

110-
#### v1.11
110+
#### v1_11
111111
- `id`: a `string` feature.
112112
- `question`: a `string` feature.
113113
- `choices`: a `list` of `string` features.

datasets/cos_e/cos_e.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,9 @@ def _download_and_index_cqa(dl_manager, name):
5858

5959
downloaded_files = dl_manager.download_and_extract(
6060
{
61-
"cqa_train": _CQA_V1_11_URL_TRAIN if name == "v1.11" else _CQA_V1_0_URL_TRAIN,
62-
"cqa_dev": _CQA_V1_11_URL_DEV if name == "v1.11" else _CQA_V1_0_URL_DEV,
63-
"cqa_test": _CQA_V1_11_URL_TEST if name == "v1.11" else _CQA_V1_0_URL_TEST,
61+
"cqa_train": _CQA_V1_11_URL_TRAIN if name == "v1_11" else _CQA_V1_0_URL_TRAIN,
62+
"cqa_dev": _CQA_V1_11_URL_DEV if name == "v1_11" else _CQA_V1_0_URL_DEV,
63+
"cqa_test": _CQA_V1_11_URL_TEST if name == "v1_11" else _CQA_V1_0_URL_TEST,
6464
}
6565
)
6666

@@ -110,12 +110,12 @@ class CosE(datasets.GeneratorBasedBuilder):
110110

111111
BUILDER_CONFIGS = [
112112
CosEConfig(
113-
name="v1.0",
113+
name="v1_0",
114114
description="cos-e version 1.0",
115115
version=datasets.Version("1.0.0", ""),
116116
),
117117
CosEConfig(
118-
name="v1.11",
118+
name="v1_11",
119119
description="cos-e version 1.11",
120120
version=datasets.Version("1.11.0", ""),
121121
),
@@ -146,15 +146,15 @@ def _split_generators(self, dl_manager):
146146
# want to _create_ the Cos-E dataset from scratch.
147147
cqa_indexed = _download_and_index_cqa(dl_manager, self.config.name)
148148

149-
if self.config.name == "v1.11":
149+
if self.config.name == "v1_11":
150150
files = dl_manager.download_and_extract(
151151
{
152152
"dev": [_COS_E_URL + "v1.11/cose_dev_v1.11_processed.jsonl"],
153153
"train": [_COS_E_URL + "v1.11/cose_train_v1.11_processed.jsonl"],
154154
}
155155
)
156156

157-
elif self.config.name == "v1.0":
157+
elif self.config.name == "v1_0":
158158
files = dl_manager.download_and_extract(
159159
{
160160
"dev": [_COS_E_URL + "v1.0/cose_dev_v1.0_processed.jsonl"],

datasets/cos_e/dataset_infos.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"v1.0": {"description": "\nCommon Sense Explanations (CoS-E) allows for training language models to\nautomatically generate explanations that can be used during training and\ninference in a novel Commonsense Auto-Generated Explanation (CAGE) framework.\n", "citation": "\n@inproceedings{rajani2019explain,\n title = \"Explain Yourself! Leveraging Language models for Commonsense Reasoning\",\n author = \"Rajani, Nazneen Fatema and\n McCann, Bryan and\n Xiong, Caiming and\n Socher, Richard\",\n year=\"2019\",\n booktitle = \"Proceedings of the 2019 Conference of the Association for Computational Linguistics (ACL2019)\",\n url =\"https://arxiv.org/abs/1906.02361\"\n}\n", "homepage": "https://github.com/salesforce/cos-e", "license": "", "features": {"id": {"dtype": "string", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "choices": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "abstractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}, "extractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "supervised_keys": null, "builder_name": "cos_e", "config_name": "v1.0", "version": {"version_str": "1.0.0", "description": "", "datasets_version_to_prepare": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2077517, "num_examples": 7610, "dataset_name": "cos_e"}, "validation": {"name": "validation", "num_bytes": 261887, "num_examples": 950, "dataset_name": "cos_e"}}, "download_checksums": {"https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/train_rand_split.jsonl": {"num_bytes": 2160200, "checksum": "1989ce97e24d8572113d6a18f44e0f11ee9d206fb9bf9a1133937645583e697e"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/dev_rand_split.jsonl": {"num_bytes": 268531, "checksum": "790dd2a8492e7f3b51ded04116de603115b7acaded32ea84f6a7101f9d571ac1"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/test_rand_split_no_answers.jsonl": {"num_bytes": 250752, "checksum": "b9c3d1319667ea1569be6f7b3ed0546bd8222d2f3a759f928307343a0282e190"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/cose_dev_v1.0_processed.jsonl": {"num_bytes": 182444, "checksum": "ab7b8ac91bca1a6ba798816af6aca703a739f576c919360ddc376d9d3046be53"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/cose_train_v1.0_processed.jsonl": {"num_bytes": 1433393, "checksum": "df9f83ac4891f38e0771470858d5f1c4b5bb08fee5c53f38f9df9b3d3675ea74"}}, "download_size": 4295320, "dataset_size": 2339404, "size_in_bytes": 6634724}, "v1.11": {"description": "\nCommon Sense Explanations (CoS-E) allows for training language models to\nautomatically generate explanations that can be used during training and\ninference in a novel Commonsense Auto-Generated Explanation (CAGE) framework.\n", "citation": "\n@inproceedings{rajani2019explain,\n title = \"Explain Yourself! Leveraging Language models for Commonsense Reasoning\",\n author = \"Rajani, Nazneen Fatema and\n McCann, Bryan and\n Xiong, Caiming and\n Socher, Richard\",\n year=\"2019\",\n booktitle = \"Proceedings of the 2019 Conference of the Association for Computational Linguistics (ACL2019)\",\n url =\"https://arxiv.org/abs/1906.02361\"\n}\n", "homepage": "https://github.com/salesforce/cos-e", "license": "", "features": {"id": {"dtype": "string", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "choices": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "abstractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}, "extractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "supervised_keys": null, "builder_name": "cos_e", "config_name": "v1.11", "version": {"version_str": "1.11.0", "description": "", "datasets_version_to_prepare": null, "major": 1, "minor": 11, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2717420, "num_examples": 9741, "dataset_name": "cos_e"}, "validation": {"name": "validation", "num_bytes": 331760, "num_examples": 1221, "dataset_name": "cos_e"}}, "download_checksums": {"https://s3.amazonaws.com/commensenseqa/train_rand_split.jsonl": {"num_bytes": 3785890, "checksum": "58ffa3c8472410e24b8c43f423d89c8a003d8284698a6ed7874355dedd09a2fb"}, "https://s3.amazonaws.com/commensenseqa/dev_rand_split.jsonl": {"num_bytes": 471653, "checksum": "3210497fdaae614ac085d9eb873dd7f4d49b6f965a93adadc803e1229fd8a02a"}, "https://s3.amazonaws.com/commensenseqa/test_rand_split_no_answers.jsonl": {"num_bytes": 423148, "checksum": "b426896d71a9cd064cf01cfaf6e920817c51701ef66028883ac1af2e73ad5f29"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.11/cose_dev_v1.11_processed.jsonl": {"num_bytes": 200867, "checksum": "a8367c94901ba249e48bcec76eaff9e7b91cec0f0e4d94879975d7d1b952bc41"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.11/cose_train_v1.11_processed.jsonl": {"num_bytes": 1653976, "checksum": "4c0ccfd34243cf7af62b441643437769663edcb980b991487f766b97a547e9bd"}}, "download_size": 6535534, "dataset_size": 3049180, "size_in_bytes": 9584714}}
1+
{"v1_0": {"description": "\nCommon Sense Explanations (CoS-E) allows for training language models to\nautomatically generate explanations that can be used during training and\ninference in a novel Commonsense Auto-Generated Explanation (CAGE) framework.\n", "citation": "\n@inproceedings{rajani2019explain,\n title = \"Explain Yourself! Leveraging Language models for Commonsense Reasoning\",\n author = \"Rajani, Nazneen Fatema and\n McCann, Bryan and\n Xiong, Caiming and\n Socher, Richard\",\n year=\"2019\",\n booktitle = \"Proceedings of the 2019 Conference of the Association for Computational Linguistics (ACL2019)\",\n url =\"https://arxiv.org/abs/1906.02361\"\n}\n", "homepage": "https://github.com/salesforce/cos-e", "license": "", "features": {"id": {"dtype": "string", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "choices": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "abstractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}, "extractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "supervised_keys": null, "builder_name": "cos_e", "config_name": "v1_0", "version": {"version_str": "1.0.0", "description": "", "datasets_version_to_prepare": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2077517, "num_examples": 7610, "dataset_name": "cos_e"}, "validation": {"name": "validation", "num_bytes": 261887, "num_examples": 950, "dataset_name": "cos_e"}}, "download_checksums": {"https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/train_rand_split.jsonl": {"num_bytes": 2160200, "checksum": "1989ce97e24d8572113d6a18f44e0f11ee9d206fb9bf9a1133937645583e697e"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/dev_rand_split.jsonl": {"num_bytes": 268531, "checksum": "790dd2a8492e7f3b51ded04116de603115b7acaded32ea84f6a7101f9d571ac1"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/test_rand_split_no_answers.jsonl": {"num_bytes": 250752, "checksum": "b9c3d1319667ea1569be6f7b3ed0546bd8222d2f3a759f928307343a0282e190"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/cose_dev_v1.0_processed.jsonl": {"num_bytes": 182444, "checksum": "ab7b8ac91bca1a6ba798816af6aca703a739f576c919360ddc376d9d3046be53"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/cose_train_v1.0_processed.jsonl": {"num_bytes": 1433393, "checksum": "df9f83ac4891f38e0771470858d5f1c4b5bb08fee5c53f38f9df9b3d3675ea74"}}, "download_size": 4295320, "dataset_size": 2339404, "size_in_bytes": 6634724}, "v1_11": {"description": "\nCommon Sense Explanations (CoS-E) allows for training language models to\nautomatically generate explanations that can be used during training and\ninference in a novel Commonsense Auto-Generated Explanation (CAGE) framework.\n", "citation": "\n@inproceedings{rajani2019explain,\n title = \"Explain Yourself! Leveraging Language models for Commonsense Reasoning\",\n author = \"Rajani, Nazneen Fatema and\n McCann, Bryan and\n Xiong, Caiming and\n Socher, Richard\",\n year=\"2019\",\n booktitle = \"Proceedings of the 2019 Conference of the Association for Computational Linguistics (ACL2019)\",\n url =\"https://arxiv.org/abs/1906.02361\"\n}\n", "homepage": "https://github.com/salesforce/cos-e", "license": "", "features": {"id": {"dtype": "string", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "choices": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "abstractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}, "extractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "supervised_keys": null, "builder_name": "cos_e", "config_name": "v1_11", "version": {"version_str": "1.11.0", "description": "", "datasets_version_to_prepare": null, "major": 1, "minor": 11, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2717420, "num_examples": 9741, "dataset_name": "cos_e"}, "validation": {"name": "validation", "num_bytes": 331760, "num_examples": 1221, "dataset_name": "cos_e"}}, "download_checksums": {"https://s3.amazonaws.com/commensenseqa/train_rand_split.jsonl": {"num_bytes": 3785890, "checksum": "58ffa3c8472410e24b8c43f423d89c8a003d8284698a6ed7874355dedd09a2fb"}, "https://s3.amazonaws.com/commensenseqa/dev_rand_split.jsonl": {"num_bytes": 471653, "checksum": "3210497fdaae614ac085d9eb873dd7f4d49b6f965a93adadc803e1229fd8a02a"}, "https://s3.amazonaws.com/commensenseqa/test_rand_split_no_answers.jsonl": {"num_bytes": 423148, "checksum": "b426896d71a9cd064cf01cfaf6e920817c51701ef66028883ac1af2e73ad5f29"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.11/cose_dev_v1.11_processed.jsonl": {"num_bytes": 200867, "checksum": "a8367c94901ba249e48bcec76eaff9e7b91cec0f0e4d94879975d7d1b952bc41"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.11/cose_train_v1.11_processed.jsonl": {"num_bytes": 1653976, "checksum": "4c0ccfd34243cf7af62b441643437769663edcb980b991487f766b97a547e9bd"}}, "download_size": 6535534, "dataset_size": 3049180, "size_in_bytes": 9584714}}

0 commit comments

Comments
 (0)