-
Notifications
You must be signed in to change notification settings - Fork 29
Add xnli #134
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Add xnli #134
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,122 @@ | ||
| """ | ||
| XNLI is an evaluation corpus for language transfer and cross-lingual sentence classification in 15 languages. | ||
| https://arxiv.org/abs/1809.05053 | ||
| Homepage: None, Repo: https://github.com/facebookresearch/XNLI | ||
| """ | ||
| import typing | ||
|
|
||
| from lm_eval.api.task import PromptSourceTask | ||
|
|
||
|
|
||
| _CITATION = """ | ||
| @inproceedings{conneau2018xnli, | ||
| title={XNLI: Evaluating Cross-lingual Sentence Representations}, | ||
| author={Conneau, Alexis and Rinott, Ruty and Lample, Guillaume and Williams, Adina and Bowman, Samuel and Schwenk, Holger and Stoyanov, Veselin}, | ||
| booktitle={Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing}, | ||
| pages={2475--2485}, | ||
| year={2018} | ||
| } | ||
| }""" | ||
|
|
||
|
|
||
| class XNLI(PromptSourceTask): | ||
| VERSION = 1 | ||
| DATASET_PATH = "xnli" | ||
| DATASET_NAME = None | ||
|
|
||
| def has_training_docs(self): | ||
| return True | ||
|
|
||
| def has_validation_docs(self): | ||
| return True | ||
|
|
||
| def has_test_docs(self): | ||
| return True | ||
|
|
||
| def training_docs(self): | ||
| if self.has_training_docs(): | ||
| return self.dataset["train"] | ||
|
|
||
| def validation_docs(self): | ||
| if self.has_validation_docs(): | ||
| return self.dataset["validation"] | ||
|
|
||
|
|
||
|
Comment on lines
+37
to
+45
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a |
||
| class XNLIEn(XNLI): | ||
| DATASET_NAME = "en" | ||
|
|
||
| class XNLIFr(XNLI): | ||
| DATASET_NAME = "fr" | ||
|
|
||
| class XNLIEs(XNLI): | ||
| DATASET_NAME = "es" | ||
|
|
||
| class XNLIDe(XNLI): | ||
| DATASET_NAME = "de" | ||
|
|
||
| class XNLIEl(XNLI): | ||
| DATASET_NAME = "el" | ||
|
|
||
| class XNLIBg(XNLI): | ||
| DATASET_NAME = "bg" | ||
|
|
||
| class XNLIRu(XNLI): | ||
| DATASET_NAME = "ru" | ||
|
|
||
| class XNLITr(XNLI): | ||
| DATASET_NAME = "tr" | ||
|
|
||
| class XNLIAr(XNLI): | ||
| DATASET_NAME = "ar" | ||
|
|
||
| class XNLIVi(XNLI): | ||
| DATASET_NAME = "vi" | ||
|
|
||
| class XNLITh(XNLI): | ||
| DATASET_NAME = "th" | ||
|
|
||
| class XNLIZh(XNLI): | ||
| DATASET_NAME = "zh" | ||
|
|
||
| class XNLIHi(XNLI): | ||
| DATASET_NAME = "hi" | ||
|
|
||
| class XNLISw(XNLI): | ||
| DATASET_NAME = "sw" | ||
|
|
||
| class XNLIUr(XNLI): | ||
| DATASET_NAME = "ur" | ||
|
Comment on lines
+49
to
+89
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove these classes. Unfortunately, English is currently the only language with promptsource support on the |
||
|
|
||
|
|
||
| XNLI_TASKS = [ | ||
| XNLIEn, | ||
| XNLIFr, | ||
| XNLIEs, | ||
| XNLIDe, | ||
| XNLIEl, | ||
| XNLIBg, | ||
| XNLIRu, | ||
| XNLITr, | ||
| XNLIAr, | ||
| XNLIVi, | ||
| XNLITh, | ||
| XNLIZh, | ||
| XNLIHi, | ||
| XNLISw, | ||
| XNLIUr | ||
|
Comment on lines
+94
to
+107
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove these tasks (see comment above about lack of |
||
| ] | ||
|
|
||
|
|
||
| def construct_tasks() -> typing.Dict[str, XNLI]: | ||
| """ | ||
| Returns a dictionary of tasks keyed by task name, for example: | ||
| "GEM/wiki_lingua_ar" | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Change this key to an XNLI matching example, e.g. |
||
| will dispatch to the GEM WikiLingua Arabic class. | ||
| """ | ||
| tasks = {} | ||
| for task_class in XNLI_TASKS: | ||
| benchmark = task_class.DATASET_PATH | ||
| lang = task_class.DATASET_NAME | ||
| tasks[f"{benchmark}_{lang}"] = task_class | ||
| return tasks | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove these tasks (see comment above about lack of
promptsourcesupport for non-English tasks).