From c3118f024c79f1494e7a1cb864a7df884792cfae Mon Sep 17 00:00:00 2001 From: haileyschoelkopf Date: Tue, 5 Jul 2022 12:30:20 -0400 Subject: [PATCH 1/5] remove promptsource dependency --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 51f7566a2f..8ca5f827e4 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,8 @@ "tqdm-multiprocess==0.0.11", "accelerate@git+https://github.com/huggingface/accelerate@main", "transformers@git+https://github.com/huggingface/transformers@main", - "promptsource@git+https://github.com/bigscience-workshop/promptsource@eval-hackathon", + #"promptsource@git+https://github.com/bigscience-workshop/promptsource@eval-hackathon", + # install promptsource manually to ensure it's up-to-date with the correct branch ] dependency_links = [] From fff00b57c6b847a644c5d4a3bd593925a5c94911 Mon Sep 17 00:00:00 2001 From: haileyschoelkopf Date: Sat, 16 Jul 2022 12:08:38 -0400 Subject: [PATCH 2/5] add xcopa task --- lm_eval/tasks/__init__.py | 5 ++- lm_eval/tasks/xcopa.py | 85 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 lm_eval/tasks/xcopa.py diff --git a/lm_eval/tasks/__init__.py b/lm_eval/tasks/__init__.py index 0084a3f7e6..beb2dd4fe6 100644 --- a/lm_eval/tasks/__init__.py +++ b/lm_eval/tasks/__init__.py @@ -33,6 +33,7 @@ from . import tydiqa from . import wino_bias from . import wmt +from . import xcopa from . import xquad @@ -79,7 +80,7 @@ "hans": hans.HANS, # CNN Daily Mail "cnn_dailymail": cnn_dailymail.CnnDailyMail, - # GEM/xum + # GEM/xsum "gem_xsum": gem_xsum.GEMXSUM, "gem_xsum_challenge_sample": gem_xsum.GEMXSUMChallgeSample, "gem_xsum_challenge_test_backtranslation": gem_xsum.GEMXSUMChallgeTestBacktranslation, @@ -198,6 +199,8 @@ # TyDi QA "tydiqa_primary": tydiqa.TyDiQAPrimaryClassification, "tydiqa_secondary": tydiqa.TyDiQAGoldPGeneration, + # XCOPA + **xcopa.construct_tasks(), ####################################################### # TODO: Not Yet Available in `promptsource/eval-hackathon` ######################################################## diff --git a/lm_eval/tasks/xcopa.py b/lm_eval/tasks/xcopa.py new file mode 100644 index 0000000000..055d235079 --- /dev/null +++ b/lm_eval/tasks/xcopa.py @@ -0,0 +1,85 @@ +""" + +Homepage: +""" +import typing + +from lm_eval.api.task import PromptSourceTask + + +_CITATION = """ +TODO: add +""" + +class XCopaBase(PromptSourceTask): + VERSION = 0 + DATASET_PATH = "xcopa" + DATASET_NAME = None + + def has_training_docs(self): + return False + + def has_validation_docs(self): + return True + + def has_test_docs(self): + return True + + def validation_docs(self): + return self.dataset["validation"] + + def test_docs(self): + return self.dataset["test"] + + def invalid_doc_for_prompt(self, doc) -> bool: + # HACK: Some copa templates have conditionals that ignore documents + # when the condition is not met, like `{if doc['question'] != \"cause\"}`. + # This means the prompt will never produce an input and target. + try: + result = self.prompt_template.apply(doc) + if result == ['']: + return True + else: + return False + except Exception: + return True + +class XCopaId(XCopaBase): + DATASET_NAME = "id" + +class XCopaIt(XCopaBase): + DATASET_NAME = "it" + +class XCopaSw(XCopaBase): + DATASET_NAME = "sw" + +class XCopaTa(XCopaBase): + DATASET_NAME = "ta" + +class XCopaVi(XCopaBase): + DATASET_NAME = "vi" + +class XCopaZh(XCopaBase): + DATASET_NAME = "zh" + +XCOPA_TASKS = [ + XCopaId, + XCopaIt, + XCopaSw, + XCopaTa, + XCopaVi, + XCopaZh, +] + +def construct_tasks() -> typing.Dict[str, XCopaBase]: + """ + Returns a dictionary of tasks keyed by task name, for example: + "xcopa/id": XCopaId + will dispatch to the GEM WikiLingua Arabic class. + """ + tasks = {} + for task_class in XCOPA_TASKS: + benchmark = task_class.DATASET_PATH + lang = task_class.DATASET_NAME + tasks[f"{benchmark}_{lang}"] = task_class + return tasks \ No newline at end of file From dcb0cbfa5824d8438e26e6ea2b5a95c45bceca2f Mon Sep 17 00:00:00 2001 From: Hailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com> Date: Sat, 16 Jul 2022 17:04:05 -0400 Subject: [PATCH 3/5] Update lm_eval/tasks/xcopa.py Co-authored-by: Niklas Muennighoff --- lm_eval/tasks/xcopa.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lm_eval/tasks/xcopa.py b/lm_eval/tasks/xcopa.py index 055d235079..1becb14fa8 100644 --- a/lm_eval/tasks/xcopa.py +++ b/lm_eval/tasks/xcopa.py @@ -8,7 +8,13 @@ _CITATION = """ -TODO: add +@article{ponti2020xcopa, + title={{XCOPA: A} Multilingual Dataset for Causal Commonsense Reasoning}, + author={Edoardo M. Ponti, Goran Glava�{s}, Olga Majewska, Qianchu Liu, Ivan Vuli'{c} and Anna Korhonen}, + journal={arXiv preprint}, + year={2020}, + url={https://ducdauge.github.io/files/xcopa.pdf} +} """ class XCopaBase(PromptSourceTask): From 59784391b188bd817589504239f10f2c997989fb Mon Sep 17 00:00:00 2001 From: Hailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com> Date: Sat, 16 Jul 2022 17:04:29 -0400 Subject: [PATCH 4/5] Update lm_eval/tasks/xcopa.py Co-authored-by: Niklas Muennighoff --- lm_eval/tasks/xcopa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lm_eval/tasks/xcopa.py b/lm_eval/tasks/xcopa.py index 1becb14fa8..aa81ad1b6a 100644 --- a/lm_eval/tasks/xcopa.py +++ b/lm_eval/tasks/xcopa.py @@ -88,4 +88,4 @@ def construct_tasks() -> typing.Dict[str, XCopaBase]: benchmark = task_class.DATASET_PATH lang = task_class.DATASET_NAME tasks[f"{benchmark}_{lang}"] = task_class - return tasks \ No newline at end of file + return tasks From 2c4121bdc09e105d110fbe130c992fe894e21e3f Mon Sep 17 00:00:00 2001 From: jon-tow Date: Thu, 28 Jul 2022 01:41:15 -0400 Subject: [PATCH 5/5] Address rquested changes and format --- lm_eval/tasks/__init__.py | 1 + lm_eval/tasks/xcopa.py | 31 +++++++++++++++++++------------ setup.py | 3 +-- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/lm_eval/tasks/__init__.py b/lm_eval/tasks/__init__.py index beb2dd4fe6..31e49903cd 100644 --- a/lm_eval/tasks/__init__.py +++ b/lm_eval/tasks/__init__.py @@ -200,6 +200,7 @@ "tydiqa_primary": tydiqa.TyDiQAPrimaryClassification, "tydiqa_secondary": tydiqa.TyDiQAGoldPGeneration, # XCOPA + # Format: `xcopa_{lang}` **xcopa.construct_tasks(), ####################################################### # TODO: Not Yet Available in `promptsource/eval-hackathon` diff --git a/lm_eval/tasks/xcopa.py b/lm_eval/tasks/xcopa.py index aa81ad1b6a..017b924956 100644 --- a/lm_eval/tasks/xcopa.py +++ b/lm_eval/tasks/xcopa.py @@ -1,6 +1,11 @@ """ +XCOPA: A Multilingual Dataset for Causal Commonsense Reasoning +https://arxiv.org/pdf/2005.00333v1.pdf -Homepage: +Cross-lingual Choice of Plausible Alternatives (XCOPA) is a typologically diverse +multilingual dataset for causal commonsense reasoning in 11 languages. + +Homepage: https://github.com/cambridgeltl/xcopa """ import typing @@ -17,6 +22,7 @@ } """ + class XCopaBase(PromptSourceTask): VERSION = 0 DATASET_PATH = "xcopa" @@ -38,36 +44,40 @@ def test_docs(self): return self.dataset["test"] def invalid_doc_for_prompt(self, doc) -> bool: - # HACK: Some copa templates have conditionals that ignore documents + # HACK: Some XCOPA templates have conditionals that ignore documents # when the condition is not met, like `{if doc['question'] != \"cause\"}`. # This means the prompt will never produce an input and target. try: - result = self.prompt_template.apply(doc) - if result == ['']: - return True - else: - return False + text, target = self.prompt_template.apply(doc) + return False except Exception: return True + class XCopaId(XCopaBase): DATASET_NAME = "id" + class XCopaIt(XCopaBase): DATASET_NAME = "it" + class XCopaSw(XCopaBase): DATASET_NAME = "sw" + class XCopaTa(XCopaBase): DATASET_NAME = "ta" + class XCopaVi(XCopaBase): DATASET_NAME = "vi" + class XCopaZh(XCopaBase): DATASET_NAME = "zh" + XCOPA_TASKS = [ XCopaId, XCopaIt, @@ -77,12 +87,9 @@ class XCopaZh(XCopaBase): XCopaZh, ] + def construct_tasks() -> typing.Dict[str, XCopaBase]: - """ - Returns a dictionary of tasks keyed by task name, for example: - "xcopa/id": XCopaId - will dispatch to the GEM WikiLingua Arabic class. - """ + """Returns a dictionary of tasks keyed by task name as: `"xcopa_{lang}": XCopaLang`""" tasks = {} for task_class in XCOPA_TASKS: benchmark = task_class.DATASET_PATH diff --git a/setup.py b/setup.py index 8ca5f827e4..51f7566a2f 100644 --- a/setup.py +++ b/setup.py @@ -22,8 +22,7 @@ "tqdm-multiprocess==0.0.11", "accelerate@git+https://github.com/huggingface/accelerate@main", "transformers@git+https://github.com/huggingface/transformers@main", - #"promptsource@git+https://github.com/bigscience-workshop/promptsource@eval-hackathon", - # install promptsource manually to ensure it's up-to-date with the correct branch + "promptsource@git+https://github.com/bigscience-workshop/promptsource@eval-hackathon", ] dependency_links = []