diff --git a/giskard/rag/metrics/correctness.py b/giskard/rag/metrics/correctness.py index e2937838cb..ed3d3e333d 100644 --- a/giskard/rag/metrics/correctness.py +++ b/giskard/rag/metrics/correctness.py @@ -109,13 +109,21 @@ def __call__(self, question_sample: dict, answer: AgentAnswer) -> dict: temperature=0, format="json_object", ) - return parse_json_output( + + json_output = parse_json_output( out.content, llm_client=llm_client, keys=["correctness", "correctness_reason"], caller_id=self.__class__.__name__, ) + if "correctness" in json_output and not isinstance(json_output["correctness"], bool): + raise LLMGenerationError( + f"Error in correctness evaluation: {json_output['correctness']}. Please make sure the agent answer is correctly formatted." + ) + + return json_output + except Exception as err: raise LLMGenerationError("Error while evaluating the agent") from err