diff --git a/src/evaluate/module.py b/src/evaluate/module.py index b86f2817b..cbb5b8853 100644 --- a/src/evaluate/module.py +++ b/src/evaluate/module.py @@ -972,7 +972,6 @@ def compute(self, predictions=None, references=None, **kwargs): for evaluation_module in self.evaluation_modules: batch = {"predictions": predictions, "references": references, **kwargs} - batch = {input_name: batch[input_name] for input_name in evaluation_module._feature_names()} results.append(evaluation_module.compute(**batch)) return self._merge_results(results) diff --git a/tests/test_metric.py b/tests/test_metric.py index 6febddcfe..598b0f929 100644 --- a/tests/test_metric.py +++ b/tests/test_metric.py @@ -746,3 +746,14 @@ def test_modules_from_string(self): self.assertDictEqual( expected_result, combined_evaluation.compute(predictions=predictions, references=references) ) + + def test_modules_from_string_poslabel(self): + expected_result = {"recall": 1.0, "precision": 0.5} + predictions = [0, 1, 0] + references = [1, 1, 0] + + combined_evaluation = combine(["recall", "precision"]) + + self.assertDictEqual( + expected_result, combined_evaluation.compute(predictions=predictions, references=references, pos_label=0) + )