Pacific-AI-Corp · luca-martial · May 9, 2023 · May 8, 2023 · May 8, 2023 · May 8, 2023
diff --git a/nlptest/transform/representation.py b/nlptest/transform/representation.py
@@ -29,7 +29,7 @@ class BaseRepresentation(ABC):
         based on the implemented representation measure.
     """
     alias_name = None
-    supported_tasks = ["ner", "text-classification"]
+    supported_tasks = ["ner", "text-classification","question-answering"]
 
     @staticmethod
     @abstractmethod
@@ -80,6 +80,8 @@ class GenderRepresentation(BaseRepresentation):
         "min_gender_representation_count",
         "min_gender_representation_proportion"
     ]
+
+    supported_tasks = ["ner", "text-classification","question-answering"]
 
     def transform(test, data, params):
         """
@@ -155,30 +157,42 @@ async def run(sample_list: List[Sample], model: ModelFactory, **kwargs) -> List[
         """
 
         progress = kwargs.get("progress_bar", False)
-
         classifier = GenderClassifier()
-        genders = [classifier.predict(sample.original)
-                   for sample in kwargs['raw_data']]
+        for sample in kwargs['raw_data']:
+
+            if "task" in sample.__annotations__:
+                    if "perturbed_context" in sample.__annotations__:
+                         genders = [classifier.predict(sample.original_context)
+                           for sample in kwargs['raw_data']]
+                    else:
+                        genders = [classifier.predict(sample.original_question)
+                           for sample in kwargs['raw_data']]
+
+            else:
+                genders = [classifier.predict(sample.original)
+                           for sample in kwargs['raw_data']]
 
         gender_counts = {
-            "male": len([x for x in genders if x == "male"]),
-            "female": len([x for x in genders if x == "female"]),
-            "unknown": len([x for x in genders if x == "unknown"])
-        }
+               "male": len([x for x in genders if x == "male"]),
+               "female": len([x for x in genders if x == "female"]),
+               "unknown": len([x for x in genders if x == "unknown"])
+            }
 
         total_samples = len(kwargs['raw_data'])
 
         for sample in sample_list:
-            if progress:
+             if progress:
                 progress.update(1)
-            if sample.test_type == "min_gender_representation_proportion":
-                sample.actual_results = MinScoreOutput(
-                    min_score=round(gender_counts[sample.test_case]/total_samples, 2))
-                sample.state = "done"
-            elif sample.test_type == "min_gender_representation_count":
-                sample.actual_results = MinScoreOutput(
-                    min_score=round(gender_counts[sample.test_case], 2))
-                sample.state = "done"
+
+             if sample.test_type == "min_gender_representation_proportion":
+                    sample.actual_results = MinScoreOutput(
+                        min_score=round(gender_counts[sample.test_case]/total_samples, 2))
+                    sample.state = "done"
+
+             elif sample.test_type == "min_gender_representation_count":
+                    sample.actual_results = MinScoreOutput(
+                        min_score=round(gender_counts[sample.test_case], 2))
+                    sample.state = "done"
         return sample_list
 
 
@@ -196,6 +210,8 @@ class EthnicityRepresentation(BaseRepresentation):
         "min_ethnicity_name_representation_count",
         "min_ethnicity_name_representation_proportion"
     ]
+
+    supported_tasks = ["ner", "text-classification","question-answering"]
 
     def transform(test, data, params):
         """
@@ -324,6 +340,8 @@ class LabelRepresentation(BaseRepresentation):
         "min_label_representation_count",
         "min_label_representation_proportion"
     ]
+
+    supported_tasks = ["ner", "text-classification"]
 
     def transform(test, data, params):
         """
@@ -454,7 +472,7 @@ class ReligionRepresentation(BaseRepresentation):
         "min_religion_name_representation_count",
         "min_religion_name_representation_proportion"
     ]
-    supported_tasks = ["ner", "text-classification"]
+    supported_tasks = ["ner", "text-classification","question-answering"]
 
     def transform(test, data, params):
         """
@@ -594,6 +612,8 @@ class CountryEconomicRepresentation(BaseRepresentation):
         "min_country_economic_representation_count",
         "min_country_economic_representation_proportion"
     ]
+
+    supported_tasks = ["ner", "text-classification","question-answering"]
 
     def transform(test, data, params):
         """

diff --git a/nlptest/transform/utils.py b/nlptest/transform/utils.py
@@ -7071,6 +7071,11 @@ def get_country_economic_representation_dict(data: List[Sample]) -> Dict[str, in
             words = [x.span.word for x in sample.expected_results.predictions]
         elif isinstance(sample.expected_results, SequenceClassificationOutput):
             words = sample.original.split()
+        else:
+            if "perturbed_context" in sample.__annotations__:  
+                words = sample.original_context.split()
+            else:
+                words = sample.original_question.split()
         for i in words:
             if check_name(i, [country_economic_dict['High-income']]):
                 country_economic_representation["high_income"] += 1
@@ -7101,6 +7106,11 @@ def get_religion_name_representation_dict(data: List[Sample]) -> Dict[str, int]:
             words = [x.span.word for x in sample.expected_results.predictions]
         elif isinstance(sample.expected_results, SequenceClassificationOutput):
             words = sample.original.split()
+        else:
+            if "perturbed_context" in sample.__annotations__:  
+                words = sample.original_context.split()
+            else:
+                words = sample.original_question.split()
         for i in words:
             if check_name(i, [religion_wise_names['Muslim']]):
                 religion_representation["muslim"] += 1
@@ -7136,6 +7146,11 @@ def get_ethnicity_representation_dict(data: List[Sample]) -> Dict[str, int]:
             words = [x.span.word for x in sample.expected_results.predictions]
         elif isinstance(sample.expected_results, SequenceClassificationOutput):
             words = sample.original.split()
+        else:
+            if "perturbed_context" in sample.__annotations__:  
+                words = sample.original_context.split()
+            else:
+                words = sample.original_question.split()   
         for i in words:
             if check_name(i, [white_names['first_names'], white_names['last_names']]):
                 ethnicity_representation["white"] += 1
@@ -7165,7 +7180,10 @@ def get_entity_representation_proportions(entity_representation):
     total_entities = sum(entity_representation.values())
     entity_representation_proportion = {}
     for k, v in entity_representation.items():
-        entity_representation_proportion[k] = v / total_entities
+        if total_entities == 0:
+            entity_representation_proportion[k] = 0
+        else:
+            entity_representation_proportion[k] = v / total_entities
 
     return entity_representation_proportion