diff --git a/evals/elsuite/basic/includes.py b/evals/elsuite/basic/includes.py index af16600628..ab37c429d9 100644 --- a/evals/elsuite/basic/includes.py +++ b/evals/elsuite/basic/includes.py @@ -24,7 +24,7 @@ def eval_sample(self, sample: Any, *_): self.model_spec, sample["input"], max_tokens=self.max_tokens ) includes_answer = any( - [evals.elsuite.utils.get_answer(sampled, ref) for ref in sample["ideal"]] + [evals.elsuite.utils.get_answer(sampled, ref) is not None for ref in sample["ideal"]] ) evals.record.record_metrics(accuracy=float(includes_answer)) return includes_answer