diff --git a/datasets/adversarial_qa/adversarial_qa.py b/datasets/adversarial_qa/adversarial_qa.py index 80912b38edf..4a4ee2d3de2 100644 --- a/datasets/adversarial_qa/adversarial_qa.py +++ b/datasets/adversarial_qa/adversarial_qa.py @@ -169,13 +169,14 @@ def _generate_examples(self, filepath, split, model_in_the_loop): logger.info("generating examples from = %s", filepath) with open(filepath, encoding="utf-8") as f: squad = json.load(f) + id_ = 0 for article in squad["data"]: title = article.get("title", "").strip() for paragraph in article["paragraphs"]: context = paragraph["context"].strip() for qa in paragraph["qas"]: question = qa["question"].strip() - id_ = qa["id"] + qid = qa["id"] answer_starts = [answer["answer_start"] for answer in qa["answers"]] answers = [answer["text"].strip() for answer in qa["answers"]] @@ -188,10 +189,12 @@ def _generate_examples(self, filepath, split, model_in_the_loop): "title": title, "context": context, "question": question, - "id": id_, + "id": qid, "answers": { "answer_start": answer_starts, "text": answers, }, "metadata": {"split": split, "model_in_the_loop": model_in_the_loop}, } + + id_ += 1