eval-protocol · xzrderek · Dec 3, 2025 · Dec 3, 2025 · Dec 3, 2025 · Dec 3, 2025
diff --git a/eval_protocol/mcp/execution/base_policy.py b/eval_protocol/mcp/execution/base_policy.py
@@ -199,6 +199,9 @@ async def _generate_live_tool_calls(
         if message.get("tool_calls"):
             assistant_message_for_history["tool_calls"] = message["tool_calls"]
 
+        if message.get("reasoning_details"):
+            assistant_message_for_history["reasoning_details"] = message["reasoning_details"]
+
         # Add to actual conversation history
         conversation_history.append(assistant_message_for_history)
 

diff --git a/eval_protocol/mcp/execution/policy.py b/eval_protocol/mcp/execution/policy.py
@@ -146,7 +146,7 @@ def _clean_messages_for_api(self, messages: List[Dict]) -> List[Dict]:
             Clean messages with only OpenAI API compatible fields
         """
         # Standard OpenAI message fields
-        allowed_fields = {"role", "content", "tool_calls", "tool_call_id", "name"}
+        allowed_fields = {"role", "content", "tool_calls", "tool_call_id", "name", "reasoning_details"}
 
         clean_messages = []
         for msg in messages:
@@ -217,31 +217,38 @@ async def _make_llm_call(self, messages: List[Dict[str, Any]], tools: List[Dict[
                 logger.debug(f"🔄 API call for model: {self.model_id}")
 
             # LiteLLM already returns OpenAI-compatible format
+            message_obj = getattr(response.choices[0], "message", object())
+
+            message_dict: Dict[str, Any] = {
+                "role": getattr(message_obj, "role", "assistant"),
+                "content": getattr(message_obj, "content", None),
+                "tool_calls": (
+                    [
+                        {
+                            "id": getattr(tc, "id", None),
+                            "type": getattr(tc, "type", "function"),
+                            "function": {
+                                "name": getattr(getattr(tc, "function", None), "name", "tool"),
+                                "arguments": getattr(getattr(tc, "function", None), "arguments", "{}"),
+                            },
+                        }
+                        for tc in (getattr(message_obj, "tool_calls", []) or [])
+                    ]
+                    if getattr(message_obj, "tool_calls", None)
+                    else []
+                ),
+            }
+
+            provider_specific_fields = getattr(message_obj, "provider_specific_fields", None)
+            if isinstance(provider_specific_fields, dict):
+                for key, value in provider_specific_fields.items():
+                    if value is not None and key not in message_dict:
+                        message_dict[key] = value
+
             return {
                 "choices": [
                     {
-                        "message": {
-                            "role": getattr(getattr(response.choices[0], "message", object()), "role", "assistant"),
-                            "content": getattr(getattr(response.choices[0], "message", object()), "content", None),
-                            "tool_calls": (
-                                [
-                                    {
-                                        "id": getattr(tc, "id", None),
-                                        "type": getattr(tc, "type", "function"),
-                                        "function": {
-                                            "name": getattr(getattr(tc, "function", None), "name", "tool"),
-                                            "arguments": getattr(getattr(tc, "function", None), "arguments", "{}"),
-                                        },
-                                    }
-                                    for tc in (
-                                        getattr(getattr(response.choices[0], "message", object()), "tool_calls", [])
-                                        or []
-                                    )
-                                ]
-                                if getattr(getattr(response.choices[0], "message", object()), "tool_calls", None)
-                                else []
-                            ),
-                        },
+                        "message": message_dict,
                         "finish_reason": getattr(response.choices[0], "finish_reason", None),
                     }
                 ],