-
Notifications
You must be signed in to change notification settings - Fork 2.9k
add asdiv task #244
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
add asdiv task #244
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
b0a1231
add asdiv task
xagi-dev bce9f28
remove apps
xagi-dev 0463573
remove unrequired files&add pin commit hash
xagi-dev 72d7cc0
remove _strip_bracket function
xagi-dev 83e1a11
removed strip_bracket function
xagi-dev 4b3dee6
asdiv: space convention
leogao2 8dbd24f
Merge branch 'master' of github.com:EleutherAI/lm-evaluation-harness …
leogao2 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,118 @@ | ||
| """ | ||
| ASDiv: A Diverse Corpus for Evaluating and Developing English Math Word Problem Solvers | ||
| https://arxiv.org/abs/2106.15772 | ||
|
|
||
| @misc{miao2021diverse, | ||
| title={A Diverse Corpus for Evaluating and Developing English Math Word Problem Solvers}, | ||
| author={Shen-Yun Miao and Chao-Chun Liang and Keh-Yih Su}, | ||
| year={2021}, | ||
| eprint={2106.15772}, | ||
| archivePrefix={arXiv}, | ||
| primaryClass={cs.AI} | ||
| } | ||
| """ | ||
| from lm_eval.base import Task | ||
| from pathlib import Path | ||
| from best_download import download_file | ||
| import xml.etree.ElementTree as ET | ||
| from lm_eval.base import rf | ||
| from lm_eval.metrics import mean,perplexity | ||
| import numpy as np | ||
| from zipfile import ZipFile | ||
| import os | ||
|
|
||
| #currently ignoring formula for answer generation | ||
|
|
||
| # given a subset, splits return the docs | ||
| class Asdiv(Task): | ||
| VERSION = 0 | ||
| DATASET_PATH = Path("data/asdiv") | ||
|
|
||
| def download(self): | ||
| if self.DATASET_PATH.exists(): | ||
| return | ||
| Path.mkdir(self.DATASET_PATH) | ||
| url = "https://github.com/chaochun/nlu-asdiv-dataset/archive/55790e5270bb91ccfa5053194b25732534696b50.zip" | ||
| checksum = "8f1fe4f6d5f170ec1e24ab78c244153c14c568b1bb2b1dad0324e71f37939a2d" | ||
| zip_path = self.DATASET_PATH / "55790e5270bb91ccfa5053194b25732534696b50.zip" | ||
| download_file(url, str(zip_path), checksum) | ||
| with ZipFile(zip_path, "r") as zip: | ||
| zip.extractall(self.DATASET_PATH) | ||
| os.remove(zip_path) | ||
|
|
||
| def _convert_standard(self, problem): | ||
| #TODO: include solution-type and formula | ||
| out_doc = { | ||
| "question" : problem.find('Question').text, | ||
| "body" : problem.find('Body').text, | ||
| "answer": problem.find('Answer').text | ||
| } | ||
| return out_doc | ||
|
|
||
| def load_docs(self, textfilename, tfds=False): | ||
| tree = ET.parse(textfilename) | ||
| root = tree.getroot() | ||
| for pid, problem in enumerate(root.iter('Problem')): | ||
| out_doc = self._convert_standard(problem) | ||
| yield out_doc | ||
|
|
||
| def has_training_docs(self): | ||
| return False | ||
|
|
||
| def has_validation_docs(self): | ||
| return True | ||
|
|
||
| def has_test_docs(self): | ||
| return False | ||
|
|
||
| def training_docs(self): | ||
| raise NotImplementedError("This dataset has no training docs") | ||
|
|
||
| def test_docs(self): | ||
| raise NotImplementedError("This dataset has no test docs") | ||
|
|
||
| def validation_docs(self): | ||
| data_xml_path = self.DATASET_PATH / "nlu-asdiv-dataset-55790e5270bb91ccfa5053194b25732534696b50/dataset/ASDiv.xml" | ||
| return self.load_docs(data_xml_path) | ||
|
|
||
| def fewshot_context(self, doc, num_fewshot, provide_description, rnd): | ||
| assert num_fewshot == 0, "ASDiv is intended only for the zero-shot setting." | ||
| return super().fewshot_context(doc, num_fewshot, provide_description, rnd) | ||
|
|
||
|
|
||
| def fewshot_description(self): | ||
| # TODO: add solution-type and formula | ||
| desc = "information containing the context of the question\nQuestion: Text of a question.\nAnswer: Answer to the question, based on the passage.\n" | ||
| return desc | ||
|
|
||
| def doc_to_text(self, doc): | ||
| # TODO: add solution-type | ||
| return doc['body'] + '\n' + 'Question:' + doc['question'] + '\n' + 'Answer:' | ||
|
|
||
| def doc_to_target(self, doc): | ||
| # TODO: add formula | ||
|
|
||
| answer = doc['answer'].split(' (')[0] | ||
| return " " + answer | ||
|
|
||
| def construct_requests(self, doc, ctx): | ||
| ll, is_greedy = rf.loglikelihood(ctx, self.doc_to_target(doc)) | ||
| return ll, is_greedy | ||
|
|
||
| def process_results(self, doc, results): | ||
| ll, is_greedy = results | ||
|
|
||
| return { | ||
| 'acc': int(is_greedy) | ||
| } | ||
|
|
||
| def aggregation(self): | ||
| return { | ||
| 'acc': mean | ||
| } | ||
|
|
||
| def higher_is_better(self): | ||
| return { | ||
| 'acc': True | ||
| } | ||
|
|
||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.