bigscience-workshop · haileyschoelkopf · Jul 5, 2022 · Jul 16, 2022 · Jul 16, 2022 · Jul 16, 2022
@@ -33,6 +33,7 @@
 from . import tydiqa
 from . import wino_bias
 from . import wmt
+from . import xcopa
 from . import xquad
 
 
@@ -79,7 +80,7 @@
     "hans": hans.HANS,
     # CNN Daily Mail
     "cnn_dailymail": cnn_dailymail.CnnDailyMail,
-    # GEM/xum
+    # GEM/xsum
     "gem_xsum": gem_xsum.GEMXSUM,
     "gem_xsum_challenge_sample": gem_xsum.GEMXSUMChallgeSample,
     "gem_xsum_challenge_test_backtranslation": gem_xsum.GEMXSUMChallgeTestBacktranslation,
@@ -198,6 +199,8 @@
     # TyDi QA
     "tydiqa_primary": tydiqa.TyDiQAPrimaryClassification,
     "tydiqa_secondary": tydiqa.TyDiQAGoldPGeneration,
+    # XCOPA
+    **xcopa.construct_tasks(),
     #######################################################
     # TODO: Not Yet Available in `promptsource/eval-hackathon`
     ########################################################

@@ -0,0 +1,85 @@
+"""
+
+Homepage:
+"""
+import typing
+
+from lm_eval.api.task import PromptSourceTask
+
+
+_CITATION = """
+TODO: add
+"""
+
+class XCopaBase(PromptSourceTask):
+    VERSION = 0
+    DATASET_PATH = "xcopa"
+    DATASET_NAME = None
+
+    def has_training_docs(self):
+        return False
+
+    def has_validation_docs(self):
+        return True
+
+    def has_test_docs(self):
+        return True
+
+    def validation_docs(self):
+        return self.dataset["validation"]
+
+    def test_docs(self):
+        return self.dataset["test"]
+
+    def invalid_doc_for_prompt(self, doc) -> bool:
+        # HACK: Some copa templates have conditionals that ignore documents
+        # when the condition is not met, like `{if doc['question'] != \"cause\"}`.
+        # This means the prompt will never produce an input and target.
+        try:
+            result = self.prompt_template.apply(doc)
+            if result == ['']:
+                return True
+            else:
+                return False
+        except Exception:
+            return True
+
+class XCopaId(XCopaBase):
+    DATASET_NAME = "id"
+
+class XCopaIt(XCopaBase):
+    DATASET_NAME = "it"
+
+class XCopaSw(XCopaBase):
+    DATASET_NAME = "sw"
+
+class XCopaTa(XCopaBase):
+    DATASET_NAME = "ta"
+
+class XCopaVi(XCopaBase):
+    DATASET_NAME = "vi"
+
+class XCopaZh(XCopaBase):
+    DATASET_NAME = "zh"
+
+XCOPA_TASKS = [
+    XCopaId,
+    XCopaIt,
+    XCopaSw,
+    XCopaTa,
+    XCopaVi,
+    XCopaZh,
+]
+
+def construct_tasks() -> typing.Dict[str, XCopaBase]:
+    """
+    Returns a dictionary of tasks keyed by task name, for example:
+        "xcopa/id": XCopaId
+    will dispatch to the GEM WikiLingua Arabic class.
+    """
+    tasks = {}
+    for task_class in XCOPA_TASKS:
+        benchmark = task_class.DATASET_PATH
+        lang = task_class.DATASET_NAME
+        tasks[f"{benchmark}_{lang}"] = task_class
+    return tasks
diff --git a/setup.py b/setup.py
@@ -22,7 +22,8 @@
     "tqdm-multiprocess==0.0.11",
     "accelerate@git+https://github.com/huggingface/accelerate@main",
     "transformers@git+https://github.com/huggingface/transformers@main",
-    "promptsource@git+https://github.com/bigscience-workshop/promptsource@eval-hackathon",
+    #"promptsource@git+https://github.com/bigscience-workshop/promptsource@eval-hackathon", 
+    # install promptsource manually to ensure it's up-to-date with the correct branch
 ]
 dependency_links = []