+{"plain_text": {"description": "Stanford Question Answering Dataset (SQuAD) is a reading comprehension dataset, consisting of questions posed by crowdworkers on a set of Wikipedia articles, where the answer to every question is a segment of text, or span, from the corresponding reading passage, or the question might be unanswerable.\n", "citation": "@article{2016arXiv160605250R,\n author = {{Rajpurkar}, Pranav and {Zhang}, Jian and {Lopyrev},\n Konstantin and {Liang}, Percy},\n title = \"{SQuAD: 100,000+ Questions for Machine Comprehension of Text}\",\n journal = {arXiv e-prints},\n year = 2016,\n eid = {arXiv:1606.05250},\n pages = {arXiv:1606.05250},\narchivePrefix = {arXiv},\n eprint = {1606.05250},\n}\n", "homepage": "https://rajpurkar.github.io/SQuAD-explorer/", "license": "", "features": {"id": {"dtype": "string", "id": null, "_type": "Value"}, "title": {"dtype": "string", "id": null, "_type": "Value"}, "context": {"dtype": "string", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "answers": {"feature": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "answer_start": {"dtype": "int32", "id": null, "_type": "Value"}}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "builder_name": "squad", "config_name": "plain_text", "version": {"version_str": "1.0.0", "description": "", "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 79426386, "num_examples": 87599, "dataset_name": "squad"}, "validation": {"name": "validation", "num_bytes": 10491883, "num_examples": 10570, "dataset_name": "squad"}}, "download_checksums": {"https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json": {"num_bytes": 30288272, "checksum": "3527663986b8295af4f7fcdff1ba1ff3f72d07d61a20f487cb238a6ef92fd955"}, "https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json": {"num_bytes": 4854279, "checksum": "95aa6a52d5d6a735563366753ca50492a658031da74f301ac5238b03966972c9"}}, "download_size": 35142551, "post_processing_size": null, "dataset_size": 89918269, "size_in_bytes": 125060820}, "qasper": {"description": "A dataset containing 1585 papers with 5049 information-seeking questions asked by regular readers of NLP papers, and answered by a separate set of NLP practitioners.\n", "citation": "@inproceedings{Dasigi2021ADO,\n title={A Dataset of Information-Seeking Questions and Answers Anchored in Research Papers},\n author={Pradeep Dasigi and Kyle Lo and Iz Beltagy and Arman Cohan and Noah A. Smith and Matt Gardner},\n year={2021}\n}\n", "homepage": "https://allenai.org/data/qasper", "license": "CC BY 4.0", "features": {"id": {"dtype": "string", "id": null, "_type": "Value"}, "title": {"dtype": "string", "id": null, "_type": "Value"}, "abstract": {"dtype": "string", "id": null, "_type": "Value"}, "full_text": {"feature": {"section_name": {"dtype": "string", "id": null, "_type": "Value"}, "paragraphs": [{"dtype": "string", "id": null, "_type": "Value"}]}, "length": -1, "id": null, "_type": "Sequence"}, "qas": {"feature": {"question": {"dtype": "string", "id": null, "_type": "Value"}, "question_id": {"dtype": "string", "id": null, "_type": "Value"}, "nlp_background": {"dtype": "string", "id": null, "_type": "Value"}, "topic_background": {"dtype": "string", "id": null, "_type": "Value"}, "paper_read": {"dtype": "string", "id": null, "_type": "Value"}, "search_query": {"dtype": "string", "id": null, "_type": "Value"}, "question_writer": {"dtype": "string", "id": null, "_type": "Value"}, "answers": {"feature": {"answer": {"unanswerable": {"dtype": "bool", "id": null, "_type": "Value"}, "extractive_spans": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "yes_no": {"dtype": "bool", "id": null, "_type": "Value"}, "free_form_answer": {"dtype": "string", "id": null, "_type": "Value"}, "evidence": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}, "highlighted_evidence": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "annotation_id": {"dtype": "string", "id": null, "_type": "Value"}, "worker_id": {"dtype": "string", "id": null, "_type": "Value"}}, "length": -1, "id": null, "_type": "Sequence"}}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "builder_name": "qasper", "config_name": "qasper", "version": {"version_str": "0.1.0", "description": null, "major": 0, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 27277970, "num_examples": 888, "dataset_name": "qasper"}, "validation": {"name": "validation", "num_bytes": 9535330, "num_examples": 281, "dataset_name": "qasper"}}, "download_checksums": {"https://qasper-dataset.s3-us-west-2.amazonaws.com/qasper-train-dev-v0.1.tgz": {"num_bytes": 10359737, "checksum": "cd0cb8911342966fcc3eb91947af149cb7cf80b4f253ff9a6f0333f4752080dd"}}, "download_size": 10359737, "post_processing_size": null, "dataset_size": 36813300, "size_in_bytes": 47173037}}
0 commit comments