huggingface · albertvillanova · May 26, 2022 · May 26, 2022
diff --git a/datasets/spider/dataset_infos.json b/datasets/spider/dataset_infos.json
@@ -1 +1 @@
-{"spider": {"description": "Spider is a large-scale complex and cross-domain semantic parsing and text-toSQL dataset annotated by 11 college students\n", "citation": "@article{yu2018spider,\n  title={Spider: A large-scale human-labeled dataset for complex and cross-domain semantic parsing and text-to-sql task},\n  author={Yu, Tao and Zhang, Rui and Yang, Kai and Yasunaga, Michihiro and Wang, Dongxu and Li, Zifan and Ma, James and Li, Irene and Yao, Qingning and Roman, Shanelle and others},\n  journal={arXiv preprint arXiv:1809.08887},\n  year={2018}\n}\n", "homepage": "https://yale-lily.github.io/spider", "license": "CC BY-SA 4.0", "features": {"db_id": {"dtype": "string", "id": null, "_type": "Value"}, "query": {"dtype": "string", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "query_toks": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "query_toks_no_value": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "question_toks": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "builder_name": "spider", "config_name": "spider", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 4743822, "num_examples": 7000, "dataset_name": "spider"}, "validation": {"name": "validation", "num_bytes": 682126, "num_examples": 1034, "dataset_name": "spider"}}, "download_checksums": {"https://drive.google.com/uc?export=download&id=1_AckYkinAnhqmRQtGsQgUKAnTHxxX5J0": {"num_bytes": 99736136, "checksum": "5ddff97bb1d421282c593e8d30ce0ce107270f4dd4a21d60eba4bf287d5956b1"}}, "download_size": 99736136, "post_processing_size": null, "dataset_size": 5425948, "size_in_bytes": 105162084}}
+{"spider": {"description": "Spider is a large-scale complex and cross-domain semantic parsing and text-toSQL dataset annotated by 11 college students\n", "citation": "@article{yu2018spider,\n  title={Spider: A large-scale human-labeled dataset for complex and cross-domain semantic parsing and text-to-sql task},\n  author={Yu, Tao and Zhang, Rui and Yang, Kai and Yasunaga, Michihiro and Wang, Dongxu and Li, Zifan and Ma, James and Li, Irene and Yao, Qingning and Roman, Shanelle and others},\n  journal={arXiv preprint arXiv:1809.08887},\n  year={2018}\n}\n", "homepage": "https://yale-lily.github.io/spider", "license": "CC BY-SA 4.0", "features": {"db_id": {"dtype": "string", "id": null, "_type": "Value"}, "query": {"dtype": "string", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "query_toks": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "query_toks_no_value": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "question_toks": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "spider", "config_name": "spider", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 4743786, "num_examples": 7000, "dataset_name": "spider"}, "validation": {"name": "validation", "num_bytes": 682090, "num_examples": 1034, "dataset_name": "spider"}}, "download_checksums": {"https://huggingface.co/datasets/spider/resolve/main/data/spider.zip": {"num_bytes": 99736136, "checksum": "5ddff97bb1d421282c593e8d30ce0ce107270f4dd4a21d60eba4bf287d5956b1"}}, "download_size": 99736136, "post_processing_size": null, "dataset_size": 5425876, "size_in_bytes": 105162012}}
diff --git a/datasets/spider/spider.py b/datasets/spider/spider.py
@@ -41,7 +41,7 @@
 
 _LICENSE = "CC BY-SA 4.0"
 
-_URL = "https://drive.google.com/uc?export=download&id=1_AckYkinAnhqmRQtGsQgUKAnTHxxX5J0"
+_URL = "https://huggingface.co/datasets/spider/resolve/main/data/spider.zip"
 
 
 class Spider(datasets.GeneratorBasedBuilder):
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"spider": {"description": "Spider is a large-scale complex and cross-domain semantic parsing and text-toSQL dataset annotated by 11 college students\n", "citation": "@article{yu2018spider,\n title={Spider: A large-scale human-labeled dataset for complex and cross-domain semantic parsing and text-to-sql task},\n author={Yu, Tao and Zhang, Rui and Yang, Kai and Yasunaga, Michihiro and Wang, Dongxu and Li, Zifan and Ma, James and Li, Irene and Yao, Qingning and Roman, Shanelle and others},\n journal={arXiv preprint arXiv:1809.08887},\n year={2018}\n}\n", "homepage": "https://yale-lily.github.io/spider", "license": "CC BY-SA 4.0", "features": {"db_id": {"dtype": "string", "id": null, "_type": "Value"}, "query": {"dtype": "string", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "query_toks": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "query_toks_no_value": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "question_toks": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "builder_name": "spider", "config_name": "spider", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 4743822, "num_examples": 7000, "dataset_name": "spider"}, "validation": {"name": "validation", "num_bytes": 682126, "num_examples": 1034, "dataset_name": "spider"}}, "download_checksums": {"https://drive.google.com/uc?export=download&id=1_AckYkinAnhqmRQtGsQgUKAnTHxxX5J0": {"num_bytes": 99736136, "checksum": "5ddff97bb1d421282c593e8d30ce0ce107270f4dd4a21d60eba4bf287d5956b1"}}, "download_size": 99736136, "post_processing_size": null, "dataset_size": 5425948, "size_in_bytes": 105162084}}
		{"spider": {"description": "Spider is a large-scale complex and cross-domain semantic parsing and text-toSQL dataset annotated by 11 college students\n", "citation": "@article{yu2018spider,\n title={Spider: A large-scale human-labeled dataset for complex and cross-domain semantic parsing and text-to-sql task},\n author={Yu, Tao and Zhang, Rui and Yang, Kai and Yasunaga, Michihiro and Wang, Dongxu and Li, Zifan and Ma, James and Li, Irene and Yao, Qingning and Roman, Shanelle and others},\n journal={arXiv preprint arXiv:1809.08887},\n year={2018}\n}\n", "homepage": "https://yale-lily.github.io/spider", "license": "CC BY-SA 4.0", "features": {"db_id": {"dtype": "string", "id": null, "_type": "Value"}, "query": {"dtype": "string", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "query_toks": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "query_toks_no_value": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "question_toks": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "spider", "config_name": "spider", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 4743786, "num_examples": 7000, "dataset_name": "spider"}, "validation": {"name": "validation", "num_bytes": 682090, "num_examples": 1034, "dataset_name": "spider"}}, "download_checksums": {"https://huggingface.co/datasets/spider/resolve/main/data/spider.zip": {"num_bytes": 99736136, "checksum": "5ddff97bb1d421282c593e8d30ce0ce107270f4dd4a21d60eba4bf287d5956b1"}}, "download_size": 99736136, "post_processing_size": null, "dataset_size": 5425876, "size_in_bytes": 105162012}}