huggingface · lhoestq · May 11, 2022 · May 11, 2022 · May 13, 2022 · May 17, 2022
diff --git a/datasets/banking77/README.md b/datasets/banking77/README.md
@@ -1,8 +1,6 @@
 ---
 annotations_creators:
 - expert-generated
-extended:
-- original
 language_creators:
 - expert-generated
 languages:

diff --git a/datasets/c4/README.md b/datasets/c4/README.md
@@ -70,11 +70,11 @@ This is the version prepared by AllenAI, hosted at this address: https://hugging
 It comes in four variants:
 
 - `en`: 305GB in JSON format
-- `en.noblocklist`: 380GB in JSON format
-- `en.noclean`: 2.3TB in JSON format
+- `en_noblocklist`: 380GB in JSON format
+- `en_noclean`: 2.3TB in JSON format
 - `realnewslike`: 15GB in JSON format
 
-The `en.noblocklist` variant is exactly the same as the `en` variant, except we turned off the so-called "badwords filter", which removes all documents that contain words from the lists at https://github.com/LDNOOBW/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words.
+The `en_noblocklist` variant is exactly the same as the `en` variant, except we turned off the so-called "badwords filter", which removes all documents that contain words from the lists at https://github.com/LDNOOBW/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words.
 
 ### Supported Tasks and Leaderboards
 

diff --git a/datasets/c4/c4.py b/datasets/c4/c4.py
@@ -31,13 +31,13 @@
 
 _URL = "https://github.com/allenai/allennlp/discussions/5056"
 
-_VARIANTS = ["en", "realnewslike", "en.noblocklist", "en.noclean"]
+_VARIANTS = ["en", "realnewslike", "en_noblocklist", "en_noclean"]
 
 _N_SHARDS_PER_SPLIT = {
     "en": {"train": 1024, "validation": 8},
     "realnewslike": {"train": 512, "validation": 1},
-    "en.noblocklist": {"train": 1024, "validation": 8},
-    "en.noclean": {"train": 7168, "validation": 64},
+    "en_noblocklist": {"train": 1024, "validation": 8},
+    "en_noclean": {"train": 7168, "validation": 64},
 }
 
 _DATA_URL = "https://huggingface.co/datasets/allenai/c4/resolve/1ddc917116b730e1859edef32896ec5c16be51d0/{name}/c4-{split}.{index:05d}-of-{n_shards:05d}.json.gz"
@@ -68,7 +68,7 @@ def _split_generators(self, dl_manager):
         for split in ["train", "validation"]:
             n_shards = _N_SHARDS_PER_SPLIT[self.config.name][split]
             data_urls[split] = [
-                _DATA_URL.format(name=self.config.name, split=split, index=index, n_shards=n_shards)
+                _DATA_URL.format(name=self.config.name.replace("_", "."), split=split, index=index, n_shards=n_shards)
                 for index in range(n_shards)
             ]
         train_downloaded_files = dl_manager.download(data_urls["train"])

diff --git a/datasets/c4/dataset_infos.json b/datasets/c4/dataset_infos.json
diff --git a/datasets/cnn_dailymail/cnn_dailymail.py b/datasets/cnn_dailymail/cnn_dailymail.py
@@ -193,7 +193,7 @@ class CnnDailymail(datasets.GeneratorBasedBuilder):
     """CNN/DailyMail non-anonymized summarization dataset."""
 
     BUILDER_CONFIGS = [
-        CnnDailymailConfig(name=str(version), description="Plain text", version=version)
+        CnnDailymailConfig(name=str(version).replace(".", "_"), description="Plain text", version=version)
         for version in _SUPPORTED_VERSIONS
     ]
 

diff --git a/datasets/cnn_dailymail/dataset_infos.json b/datasets/cnn_dailymail/dataset_infos.json
diff --git a/...ailymail/dummy/1.0.0/1.0.0/dummy_data.zip → ...ailymail/dummy/1_0_0/1.0.0/dummy_data.zip b/...ailymail/dummy/1.0.0/1.0.0/dummy_data.zip → ...ailymail/dummy/1_0_0/1.0.0/dummy_data.zip
diff --git a/...ailymail/dummy/2.0.0/2.0.0/dummy_data.zip → ...ailymail/dummy/2_0_0/2.0.0/dummy_data.zip b/...ailymail/dummy/2.0.0/2.0.0/dummy_data.zip → ...ailymail/dummy/2_0_0/2.0.0/dummy_data.zip
diff --git a/...ailymail/dummy/3.0.0/3.0.0/dummy_data.zip → ...ailymail/dummy/3_0_0/3.0.0/dummy_data.zip b/...ailymail/dummy/3.0.0/3.0.0/dummy_data.zip → ...ailymail/dummy/3_0_0/3.0.0/dummy_data.zip
diff --git a/datasets/common_gen/README.md b/datasets/common_gen/README.md
@@ -5,7 +5,7 @@ paperswithcode_id: commongen
 pretty_name: CommonGen
 task_ids:
 - text2text-generation-other-concepts-to-text
-tasks_categories:
+task_categories:
 - text2text-generation
 ---
 

diff --git a/datasets/cos_e/README.md b/datasets/cos_e/README.md
@@ -59,7 +59,7 @@ inference in a novel Commonsense Auto-Generated Explanation (CAGE) framework.
 
 ### Data Instances
 
-#### v1.0
+#### v1_0
 
 - **Size of downloaded dataset files:** 4.10 MB
 - **Size of the generated dataset:** 2.23 MB
@@ -77,7 +77,7 @@ An example of 'train' looks as follows.
 }
 ```
 
-#### v1.11
+#### v1_11
 
 - **Size of downloaded dataset files:** 6.23 MB
 - **Size of the generated dataset:** 2.91 MB
@@ -99,15 +99,15 @@ An example of 'train' looks as follows.
 
 The data fields are the same among all splits.
 
-#### v1.0
+#### v1_0
 - `id`: a `string` feature.
 - `question`: a `string` feature.
 - `choices`: a `list` of `string` features.
 - `answer`: a `string` feature.
 - `abstractive_explanation`: a `string` feature.
 - `extractive_explanation`: a `string` feature.
 
-#### v1.11
+#### v1_11
 - `id`: a `string` feature.
 - `question`: a `string` feature.
 - `choices`: a `list` of `string` features.

diff --git a/datasets/cos_e/cos_e.py b/datasets/cos_e/cos_e.py
@@ -58,9 +58,9 @@ def _download_and_index_cqa(dl_manager, name):
 
     downloaded_files = dl_manager.download_and_extract(
         {
-            "cqa_train": _CQA_V1_11_URL_TRAIN if name == "v1.11" else _CQA_V1_0_URL_TRAIN,
-            "cqa_dev": _CQA_V1_11_URL_DEV if name == "v1.11" else _CQA_V1_0_URL_DEV,
-            "cqa_test": _CQA_V1_11_URL_TEST if name == "v1.11" else _CQA_V1_0_URL_TEST,
+            "cqa_train": _CQA_V1_11_URL_TRAIN if name == "v1_11" else _CQA_V1_0_URL_TRAIN,
+            "cqa_dev": _CQA_V1_11_URL_DEV if name == "v1_11" else _CQA_V1_0_URL_DEV,
+            "cqa_test": _CQA_V1_11_URL_TEST if name == "v1_11" else _CQA_V1_0_URL_TEST,
         }
     )
 
@@ -110,12 +110,12 @@ class CosE(datasets.GeneratorBasedBuilder):
 
     BUILDER_CONFIGS = [
         CosEConfig(
-            name="v1.0",
+            name="v1_0",
             description="cos-e version 1.0",
             version=datasets.Version("1.0.0", ""),
         ),
         CosEConfig(
-            name="v1.11",
+            name="v1_11",
             description="cos-e version 1.11",
             version=datasets.Version("1.11.0", ""),
         ),
@@ -146,15 +146,15 @@ def _split_generators(self, dl_manager):
         # want to _create_ the Cos-E dataset from scratch.
         cqa_indexed = _download_and_index_cqa(dl_manager, self.config.name)
 
-        if self.config.name == "v1.11":
+        if self.config.name == "v1_11":
             files = dl_manager.download_and_extract(
                 {
                     "dev": [_COS_E_URL + "v1.11/cose_dev_v1.11_processed.jsonl"],
                     "train": [_COS_E_URL + "v1.11/cose_train_v1.11_processed.jsonl"],
                 }
             )
 
-        elif self.config.name == "v1.0":
+        elif self.config.name == "v1_0":
             files = dl_manager.download_and_extract(
                 {
                     "dev": [_COS_E_URL + "v1.0/cose_dev_v1.0_processed.jsonl"],

diff --git a/datasets/cos_e/dataset_infos.json b/datasets/cos_e/dataset_infos.json
@@ -1 +1 @@
-{"v1.0": {"description": "\nCommon Sense Explanations (CoS-E) allows for training language models to\nautomatically generate explanations that can be used during training and\ninference in a novel Commonsense Auto-Generated Explanation (CAGE) framework.\n", "citation": "\n@inproceedings{rajani2019explain,\n     title = \"Explain Yourself! Leveraging Language models for Commonsense Reasoning\",\n    author = \"Rajani, Nazneen Fatema  and\n      McCann, Bryan  and\n      Xiong, Caiming  and\n      Socher, Richard\",\n      year=\"2019\",\n    booktitle = \"Proceedings of the 2019 Conference of the Association for Computational Linguistics (ACL2019)\",\n    url =\"https://arxiv.org/abs/1906.02361\"\n}\n", "homepage": "https://github.com/salesforce/cos-e", "license": "", "features": {"id": {"dtype": "string", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "choices": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "abstractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}, "extractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "supervised_keys": null, "builder_name": "cos_e", "config_name": "v1.0", "version": {"version_str": "1.0.0", "description": "", "datasets_version_to_prepare": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2077517, "num_examples": 7610, "dataset_name": "cos_e"}, "validation": {"name": "validation", "num_bytes": 261887, "num_examples": 950, "dataset_name": "cos_e"}}, "download_checksums": {"https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/train_rand_split.jsonl": {"num_bytes": 2160200, "checksum": "1989ce97e24d8572113d6a18f44e0f11ee9d206fb9bf9a1133937645583e697e"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/dev_rand_split.jsonl": {"num_bytes": 268531, "checksum": "790dd2a8492e7f3b51ded04116de603115b7acaded32ea84f6a7101f9d571ac1"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/test_rand_split_no_answers.jsonl": {"num_bytes": 250752, "checksum": "b9c3d1319667ea1569be6f7b3ed0546bd8222d2f3a759f928307343a0282e190"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/cose_dev_v1.0_processed.jsonl": {"num_bytes": 182444, "checksum": "ab7b8ac91bca1a6ba798816af6aca703a739f576c919360ddc376d9d3046be53"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/cose_train_v1.0_processed.jsonl": {"num_bytes": 1433393, "checksum": "df9f83ac4891f38e0771470858d5f1c4b5bb08fee5c53f38f9df9b3d3675ea74"}}, "download_size": 4295320, "dataset_size": 2339404, "size_in_bytes": 6634724}, "v1.11": {"description": "\nCommon Sense Explanations (CoS-E) allows for training language models to\nautomatically generate explanations that can be used during training and\ninference in a novel Commonsense Auto-Generated Explanation (CAGE) framework.\n", "citation": "\n@inproceedings{rajani2019explain,\n     title = \"Explain Yourself! Leveraging Language models for Commonsense Reasoning\",\n    author = \"Rajani, Nazneen Fatema  and\n      McCann, Bryan  and\n      Xiong, Caiming  and\n      Socher, Richard\",\n      year=\"2019\",\n    booktitle = \"Proceedings of the 2019 Conference of the Association for Computational Linguistics (ACL2019)\",\n    url =\"https://arxiv.org/abs/1906.02361\"\n}\n", "homepage": "https://github.com/salesforce/cos-e", "license": "", "features": {"id": {"dtype": "string", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "choices": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "abstractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}, "extractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "supervised_keys": null, "builder_name": "cos_e", "config_name": "v1.11", "version": {"version_str": "1.11.0", "description": "", "datasets_version_to_prepare": null, "major": 1, "minor": 11, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2717420, "num_examples": 9741, "dataset_name": "cos_e"}, "validation": {"name": "validation", "num_bytes": 331760, "num_examples": 1221, "dataset_name": "cos_e"}}, "download_checksums": {"https://s3.amazonaws.com/commensenseqa/train_rand_split.jsonl": {"num_bytes": 3785890, "checksum": "58ffa3c8472410e24b8c43f423d89c8a003d8284698a6ed7874355dedd09a2fb"}, "https://s3.amazonaws.com/commensenseqa/dev_rand_split.jsonl": {"num_bytes": 471653, "checksum": "3210497fdaae614ac085d9eb873dd7f4d49b6f965a93adadc803e1229fd8a02a"}, "https://s3.amazonaws.com/commensenseqa/test_rand_split_no_answers.jsonl": {"num_bytes": 423148, "checksum": "b426896d71a9cd064cf01cfaf6e920817c51701ef66028883ac1af2e73ad5f29"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.11/cose_dev_v1.11_processed.jsonl": {"num_bytes": 200867, "checksum": "a8367c94901ba249e48bcec76eaff9e7b91cec0f0e4d94879975d7d1b952bc41"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.11/cose_train_v1.11_processed.jsonl": {"num_bytes": 1653976, "checksum": "4c0ccfd34243cf7af62b441643437769663edcb980b991487f766b97a547e9bd"}}, "download_size": 6535534, "dataset_size": 3049180, "size_in_bytes": 9584714}}
+{"v1_0": {"description": "\nCommon Sense Explanations (CoS-E) allows for training language models to\nautomatically generate explanations that can be used during training and\ninference in a novel Commonsense Auto-Generated Explanation (CAGE) framework.\n", "citation": "\n@inproceedings{rajani2019explain,\n     title = \"Explain Yourself! Leveraging Language models for Commonsense Reasoning\",\n    author = \"Rajani, Nazneen Fatema  and\n      McCann, Bryan  and\n      Xiong, Caiming  and\n      Socher, Richard\",\n      year=\"2019\",\n    booktitle = \"Proceedings of the 2019 Conference of the Association for Computational Linguistics (ACL2019)\",\n    url =\"https://arxiv.org/abs/1906.02361\"\n}\n", "homepage": "https://github.com/salesforce/cos-e", "license": "", "features": {"id": {"dtype": "string", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "choices": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "abstractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}, "extractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "supervised_keys": null, "builder_name": "cos_e", "config_name": "v1_0", "version": {"version_str": "1.0.0", "description": "", "datasets_version_to_prepare": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2077517, "num_examples": 7610, "dataset_name": "cos_e"}, "validation": {"name": "validation", "num_bytes": 261887, "num_examples": 950, "dataset_name": "cos_e"}}, "download_checksums": {"https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/train_rand_split.jsonl": {"num_bytes": 2160200, "checksum": "1989ce97e24d8572113d6a18f44e0f11ee9d206fb9bf9a1133937645583e697e"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/dev_rand_split.jsonl": {"num_bytes": 268531, "checksum": "790dd2a8492e7f3b51ded04116de603115b7acaded32ea84f6a7101f9d571ac1"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/test_rand_split_no_answers.jsonl": {"num_bytes": 250752, "checksum": "b9c3d1319667ea1569be6f7b3ed0546bd8222d2f3a759f928307343a0282e190"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/cose_dev_v1.0_processed.jsonl": {"num_bytes": 182444, "checksum": "ab7b8ac91bca1a6ba798816af6aca703a739f576c919360ddc376d9d3046be53"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/cose_train_v1.0_processed.jsonl": {"num_bytes": 1433393, "checksum": "df9f83ac4891f38e0771470858d5f1c4b5bb08fee5c53f38f9df9b3d3675ea74"}}, "download_size": 4295320, "dataset_size": 2339404, "size_in_bytes": 6634724}, "v1_11": {"description": "\nCommon Sense Explanations (CoS-E) allows for training language models to\nautomatically generate explanations that can be used during training and\ninference in a novel Commonsense Auto-Generated Explanation (CAGE) framework.\n", "citation": "\n@inproceedings{rajani2019explain,\n     title = \"Explain Yourself! Leveraging Language models for Commonsense Reasoning\",\n    author = \"Rajani, Nazneen Fatema  and\n      McCann, Bryan  and\n      Xiong, Caiming  and\n      Socher, Richard\",\n      year=\"2019\",\n    booktitle = \"Proceedings of the 2019 Conference of the Association for Computational Linguistics (ACL2019)\",\n    url =\"https://arxiv.org/abs/1906.02361\"\n}\n", "homepage": "https://github.com/salesforce/cos-e", "license": "", "features": {"id": {"dtype": "string", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "choices": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "abstractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}, "extractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "supervised_keys": null, "builder_name": "cos_e", "config_name": "v1_11", "version": {"version_str": "1.11.0", "description": "", "datasets_version_to_prepare": null, "major": 1, "minor": 11, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2717420, "num_examples": 9741, "dataset_name": "cos_e"}, "validation": {"name": "validation", "num_bytes": 331760, "num_examples": 1221, "dataset_name": "cos_e"}}, "download_checksums": {"https://s3.amazonaws.com/commensenseqa/train_rand_split.jsonl": {"num_bytes": 3785890, "checksum": "58ffa3c8472410e24b8c43f423d89c8a003d8284698a6ed7874355dedd09a2fb"}, "https://s3.amazonaws.com/commensenseqa/dev_rand_split.jsonl": {"num_bytes": 471653, "checksum": "3210497fdaae614ac085d9eb873dd7f4d49b6f965a93adadc803e1229fd8a02a"}, "https://s3.amazonaws.com/commensenseqa/test_rand_split_no_answers.jsonl": {"num_bytes": 423148, "checksum": "b426896d71a9cd064cf01cfaf6e920817c51701ef66028883ac1af2e73ad5f29"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.11/cose_dev_v1.11_processed.jsonl": {"num_bytes": 200867, "checksum": "a8367c94901ba249e48bcec76eaff9e7b91cec0f0e4d94879975d7d1b952bc41"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.11/cose_train_v1.11_processed.jsonl": {"num_bytes": 1653976, "checksum": "4c0ccfd34243cf7af62b441643437769663edcb980b991487f766b97a547e9bd"}}, "download_size": 6535534, "dataset_size": 3049180, "size_in_bytes": 9584714}}
diff --git a/...ets/cos_e/dummy/v1.0/1.0.0/dummy_data.zip → ...ets/cos_e/dummy/v1_0/1.0.0/dummy_data.zip b/...ets/cos_e/dummy/v1.0/1.0.0/dummy_data.zip → ...ets/cos_e/dummy/v1_0/1.0.0/dummy_data.zip
diff --git a/...s/cos_e/dummy/v1.11/1.11.0/dummy_data.zip → ...s/cos_e/dummy/v1_11/1.11.0/dummy_data.zip b/...s/cos_e/dummy/v1.11/1.11.0/dummy_data.zip → ...s/cos_e/dummy/v1_11/1.11.0/dummy_data.zip
diff --git a/datasets/evidence_infer_treatment/README.md b/datasets/evidence_infer_treatment/README.md
@@ -1275,8 +1275,8 @@ We have recently collected additional data for this task (https://arxiv.org/abs/
 
 | name | train | validation | test |
 |------|------:|-----------:|-----:|
-| 1.1  |  1931 |        248 |  240 |
-| 2.0  |  2690 |        340 |  334 |
+| 1_1  |  1931 |        248 |  240 |
+| 2_0  |  2690 |        340 |  334 |
 
 ## Dataset Creation
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"v1.0": {"description": "\nCommon Sense Explanations (CoS-E) allows for training language models to\nautomatically generate explanations that can be used during training and\ninference in a novel Commonsense Auto-Generated Explanation (CAGE) framework.\n", "citation": "\n@inproceedings{rajani2019explain,\n title = \"Explain Yourself! Leveraging Language models for Commonsense Reasoning\",\n author = \"Rajani, Nazneen Fatema and\n McCann, Bryan and\n Xiong, Caiming and\n Socher, Richard\",\n year=\"2019\",\n booktitle = \"Proceedings of the 2019 Conference of the Association for Computational Linguistics (ACL2019)\",\n url =\"https://arxiv.org/abs/1906.02361\"\n}\n", "homepage": "https://github.com/salesforce/cos-e", "license": "", "features": {"id": {"dtype": "string", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "choices": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "abstractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}, "extractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "supervised_keys": null, "builder_name": "cos_e", "config_name": "v1.0", "version": {"version_str": "1.0.0", "description": "", "datasets_version_to_prepare": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2077517, "num_examples": 7610, "dataset_name": "cos_e"}, "validation": {"name": "validation", "num_bytes": 261887, "num_examples": 950, "dataset_name": "cos_e"}}, "download_checksums": {"https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/train_rand_split.jsonl": {"num_bytes": 2160200, "checksum": "1989ce97e24d8572113d6a18f44e0f11ee9d206fb9bf9a1133937645583e697e"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/dev_rand_split.jsonl": {"num_bytes": 268531, "checksum": "790dd2a8492e7f3b51ded04116de603115b7acaded32ea84f6a7101f9d571ac1"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/test_rand_split_no_answers.jsonl": {"num_bytes": 250752, "checksum": "b9c3d1319667ea1569be6f7b3ed0546bd8222d2f3a759f928307343a0282e190"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/cose_dev_v1.0_processed.jsonl": {"num_bytes": 182444, "checksum": "ab7b8ac91bca1a6ba798816af6aca703a739f576c919360ddc376d9d3046be53"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/cose_train_v1.0_processed.jsonl": {"num_bytes": 1433393, "checksum": "df9f83ac4891f38e0771470858d5f1c4b5bb08fee5c53f38f9df9b3d3675ea74"}}, "download_size": 4295320, "dataset_size": 2339404, "size_in_bytes": 6634724}, "v1.11": {"description": "\nCommon Sense Explanations (CoS-E) allows for training language models to\nautomatically generate explanations that can be used during training and\ninference in a novel Commonsense Auto-Generated Explanation (CAGE) framework.\n", "citation": "\n@inproceedings{rajani2019explain,\n title = \"Explain Yourself! Leveraging Language models for Commonsense Reasoning\",\n author = \"Rajani, Nazneen Fatema and\n McCann, Bryan and\n Xiong, Caiming and\n Socher, Richard\",\n year=\"2019\",\n booktitle = \"Proceedings of the 2019 Conference of the Association for Computational Linguistics (ACL2019)\",\n url =\"https://arxiv.org/abs/1906.02361\"\n}\n", "homepage": "https://github.com/salesforce/cos-e", "license": "", "features": {"id": {"dtype": "string", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "choices": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "abstractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}, "extractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "supervised_keys": null, "builder_name": "cos_e", "config_name": "v1.11", "version": {"version_str": "1.11.0", "description": "", "datasets_version_to_prepare": null, "major": 1, "minor": 11, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2717420, "num_examples": 9741, "dataset_name": "cos_e"}, "validation": {"name": "validation", "num_bytes": 331760, "num_examples": 1221, "dataset_name": "cos_e"}}, "download_checksums": {"https://s3.amazonaws.com/commensenseqa/train_rand_split.jsonl": {"num_bytes": 3785890, "checksum": "58ffa3c8472410e24b8c43f423d89c8a003d8284698a6ed7874355dedd09a2fb"}, "https://s3.amazonaws.com/commensenseqa/dev_rand_split.jsonl": {"num_bytes": 471653, "checksum": "3210497fdaae614ac085d9eb873dd7f4d49b6f965a93adadc803e1229fd8a02a"}, "https://s3.amazonaws.com/commensenseqa/test_rand_split_no_answers.jsonl": {"num_bytes": 423148, "checksum": "b426896d71a9cd064cf01cfaf6e920817c51701ef66028883ac1af2e73ad5f29"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.11/cose_dev_v1.11_processed.jsonl": {"num_bytes": 200867, "checksum": "a8367c94901ba249e48bcec76eaff9e7b91cec0f0e4d94879975d7d1b952bc41"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.11/cose_train_v1.11_processed.jsonl": {"num_bytes": 1653976, "checksum": "4c0ccfd34243cf7af62b441643437769663edcb980b991487f766b97a547e9bd"}}, "download_size": 6535534, "dataset_size": 3049180, "size_in_bytes": 9584714}}
		{"v1_0": {"description": "\nCommon Sense Explanations (CoS-E) allows for training language models to\nautomatically generate explanations that can be used during training and\ninference in a novel Commonsense Auto-Generated Explanation (CAGE) framework.\n", "citation": "\n@inproceedings{rajani2019explain,\n title = \"Explain Yourself! Leveraging Language models for Commonsense Reasoning\",\n author = \"Rajani, Nazneen Fatema and\n McCann, Bryan and\n Xiong, Caiming and\n Socher, Richard\",\n year=\"2019\",\n booktitle = \"Proceedings of the 2019 Conference of the Association for Computational Linguistics (ACL2019)\",\n url =\"https://arxiv.org/abs/1906.02361\"\n}\n", "homepage": "https://github.com/salesforce/cos-e", "license": "", "features": {"id": {"dtype": "string", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "choices": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "abstractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}, "extractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "supervised_keys": null, "builder_name": "cos_e", "config_name": "v1_0", "version": {"version_str": "1.0.0", "description": "", "datasets_version_to_prepare": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2077517, "num_examples": 7610, "dataset_name": "cos_e"}, "validation": {"name": "validation", "num_bytes": 261887, "num_examples": 950, "dataset_name": "cos_e"}}, "download_checksums": {"https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/train_rand_split.jsonl": {"num_bytes": 2160200, "checksum": "1989ce97e24d8572113d6a18f44e0f11ee9d206fb9bf9a1133937645583e697e"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/dev_rand_split.jsonl": {"num_bytes": 268531, "checksum": "790dd2a8492e7f3b51ded04116de603115b7acaded32ea84f6a7101f9d571ac1"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/test_rand_split_no_answers.jsonl": {"num_bytes": 250752, "checksum": "b9c3d1319667ea1569be6f7b3ed0546bd8222d2f3a759f928307343a0282e190"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/cose_dev_v1.0_processed.jsonl": {"num_bytes": 182444, "checksum": "ab7b8ac91bca1a6ba798816af6aca703a739f576c919360ddc376d9d3046be53"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.0/cose_train_v1.0_processed.jsonl": {"num_bytes": 1433393, "checksum": "df9f83ac4891f38e0771470858d5f1c4b5bb08fee5c53f38f9df9b3d3675ea74"}}, "download_size": 4295320, "dataset_size": 2339404, "size_in_bytes": 6634724}, "v1_11": {"description": "\nCommon Sense Explanations (CoS-E) allows for training language models to\nautomatically generate explanations that can be used during training and\ninference in a novel Commonsense Auto-Generated Explanation (CAGE) framework.\n", "citation": "\n@inproceedings{rajani2019explain,\n title = \"Explain Yourself! Leveraging Language models for Commonsense Reasoning\",\n author = \"Rajani, Nazneen Fatema and\n McCann, Bryan and\n Xiong, Caiming and\n Socher, Richard\",\n year=\"2019\",\n booktitle = \"Proceedings of the 2019 Conference of the Association for Computational Linguistics (ACL2019)\",\n url =\"https://arxiv.org/abs/1906.02361\"\n}\n", "homepage": "https://github.com/salesforce/cos-e", "license": "", "features": {"id": {"dtype": "string", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "choices": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "abstractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}, "extractive_explanation": {"dtype": "string", "id": null, "_type": "Value"}}, "supervised_keys": null, "builder_name": "cos_e", "config_name": "v1_11", "version": {"version_str": "1.11.0", "description": "", "datasets_version_to_prepare": null, "major": 1, "minor": 11, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2717420, "num_examples": 9741, "dataset_name": "cos_e"}, "validation": {"name": "validation", "num_bytes": 331760, "num_examples": 1221, "dataset_name": "cos_e"}}, "download_checksums": {"https://s3.amazonaws.com/commensenseqa/train_rand_split.jsonl": {"num_bytes": 3785890, "checksum": "58ffa3c8472410e24b8c43f423d89c8a003d8284698a6ed7874355dedd09a2fb"}, "https://s3.amazonaws.com/commensenseqa/dev_rand_split.jsonl": {"num_bytes": 471653, "checksum": "3210497fdaae614ac085d9eb873dd7f4d49b6f965a93adadc803e1229fd8a02a"}, "https://s3.amazonaws.com/commensenseqa/test_rand_split_no_answers.jsonl": {"num_bytes": 423148, "checksum": "b426896d71a9cd064cf01cfaf6e920817c51701ef66028883ac1af2e73ad5f29"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.11/cose_dev_v1.11_processed.jsonl": {"num_bytes": 200867, "checksum": "a8367c94901ba249e48bcec76eaff9e7b91cec0f0e4d94879975d7d1b952bc41"}, "https://raw.githubusercontent.com/salesforce/cos-e/master/data/v1.11/cose_train_v1.11_processed.jsonl": {"num_bytes": 1653976, "checksum": "4c0ccfd34243cf7af62b441643437769663edcb980b991487f766b97a547e9bd"}}, "download_size": 6535534, "dataset_size": 3049180, "size_in_bytes": 9584714}}