#!/usr/bin/env python3
"""Minimal repro of the agentic incident investigation eval."""

import json
import requests

URL = "http://localhost:8080/v1/chat/completions"
MODEL = "gpt-oss-120b"

TOOLS = [
    {"type": "function", "function": {"name": "search_logs", "description": "Search application logs for a service within a time range", "parameters": {"type": "object", "properties": {"service": {"type": "string"}, "query": {"type": "string"}, "start_time": {"type": "string"}, "end_time": {"type": "string"}}, "required": ["service"]}}},
    {"type": "function", "function": {"name": "get_service_status", "description": "Get the current health status and dependency information for a service", "parameters": {"type": "object", "properties": {"service": {"type": "string"}}, "required": ["service"]}}},
    {"type": "function", "function": {"name": "query_metrics", "description": "Query time-series metrics for a service", "parameters": {"type": "object", "properties": {"service": {"type": "string"}, "metric": {"type": "string"}, "start_time": {"type": "string"}, "end_time": {"type": "string"}}, "required": ["service", "metric"]}}},
    {"type": "function", "function": {"name": "list_recent_deployments", "description": "List recent deployments for a service", "parameters": {"type": "object", "properties": {"service": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["service"]}}},
    {"type": "function", "function": {"name": "get_config", "description": "Get the current runtime configuration for a service", "parameters": {"type": "object", "properties": {"service": {"type": "string"}, "section": {"type": "string"}}, "required": ["service"]}}},
]

TOOL_RESPONSES = {
    "search_logs": '{"results":[{"timestamp":"2024-01-15T14:35:12Z","level":"ERROR","service":"checkout-service","message":"Payment processing failed: connection refused to payment-processor-v2.internal:8443","trace_id":"abc-123-def"},{"timestamp":"2024-01-15T14:34:58Z","level":"ERROR","service":"checkout-service","message":"HTTP 503 from upstream: payment-processor-v2.internal:8443 - connection refused","trace_id":"abc-124-def"},{"timestamp":"2024-01-15T14:32:45Z","level":"WARN","service":"checkout-service","message":"Feature flag evaluation: use_payment_v2 = true, routing to payment-processor-v2","trace_id":"abc-125-def"},{"timestamp":"2024-01-15T14:32:30Z","level":"INFO","service":"checkout-service","message":"Configuration reloaded: payment processor endpoint changed to payment-processor-v2.internal:8443","trace_id":"abc-126-def"},{"timestamp":"2024-01-15T14:30:15Z","level":"INFO","service":"checkout-service","message":"Deployment v2.14.0 rollout complete, feature flags refreshed","trace_id":"abc-127-def"}],"total_count":1847,"truncated":true}',
    "get_service_status": '{"service":"checkout-service","status":"degraded","uptime":"14d 3h 22m","instances":{"total":5,"healthy":5,"unhealthy":0},"dependencies":[{"name":"postgres-primary","status":"healthy","latency_ms":2},{"name":"redis-cache","status":"healthy","latency_ms":1},{"name":"payment-processor-v1","status":"healthy","latency_ms":45},{"name":"payment-processor-v2","status":"unreachable","error":"connection refused: payment-processor-v2.internal:8443"},{"name":"inventory-service","status":"healthy","latency_ms":12}],"error_rate_1m":15.3,"last_deploy":"2024-01-15T14:30:00Z"}',
    "query_metrics": '{"service":"checkout-service","metric":"error_rate","unit":"percent","datapoints":[{"timestamp":"2024-01-15T14:00:00Z","value":0.1},{"timestamp":"2024-01-15T14:10:00Z","value":0.1},{"timestamp":"2024-01-15T14:20:00Z","value":0.1},{"timestamp":"2024-01-15T14:30:00Z","value":0.3},{"timestamp":"2024-01-15T14:32:00Z","value":8.7},{"timestamp":"2024-01-15T14:34:00Z","value":14.9},{"timestamp":"2024-01-15T14:36:00Z","value":15.3},{"timestamp":"2024-01-15T14:38:00Z","value":15.1},{"timestamp":"2024-01-15T14:40:00Z","value":15.3}]}',
    "list_recent_deployments": '{"service":"checkout-service","deployments":[{"version":"v2.14.0","deployed_at":"2024-01-15T14:30:00Z","deployed_by":"ci-pipeline","status":"completed","changelog":"Enable payment processor v2 feature flag","commit":"a1b2c3d","rollback_available":true},{"version":"v2.13.2","deployed_at":"2024-01-14T09:15:00Z","deployed_by":"ci-pipeline","status":"completed","changelog":"Fix cart total rounding for JPY currency","commit":"e4f5g6h","rollback_available":true},{"version":"v2.13.1","deployed_at":"2024-01-12T16:45:00Z","deployed_by":"ci-pipeline","status":"completed","changelog":"Add retry logic for inventory checks","commit":"i7j8k9l","rollback_available":false}]}',
    "get_config": '{"service":"checkout-service","environment":"production","config":{"payment":{"processor_version":"v2","v1_endpoint":"payment-processor-v1.internal:8443","v2_endpoint":"payment-processor-v2.internal:8443","active_processor":"v2","timeout_ms":5000,"retry_count":3},"feature_flags":{"use_payment_v2":true,"enable_new_cart_ui":false,"async_inventory_check":true},"rate_limits":{"checkout_per_minute":1000,"payment_per_minute":500}},"last_updated":"2024-01-15T14:30:00Z"}',
}

SYSTEM = (
    "You are an experienced Site Reliability Engineer (SRE) investigating a production incident. "
    "You have access to tools for searching logs, checking service status, querying metrics, "
    "listing deployments, and viewing configuration. "
    "Investigate the issue systematically using these tools to gather evidence. "
    "Once you have enough information, provide a root cause analysis including: "
    "1. What is failing and the impact "
    "2. The root cause "
    "3. Recommended immediate fix "
    "4. Timeline of events"
)

USER = (
    "URGENT: The checkout-service is returning HTTP 500 errors in production. "
    "Our monitoring shows the error rate spiked from 0.1% to approximately 15% starting at 14:32 UTC today. "
    "Customers are unable to complete purchases. Please investigate and determine the root cause."
)

def run_scenario(run_num):
    print(f"\n{'='*60}")
    print(f"RUN {run_num}")
    print(f"{'='*60}")

    messages = [
        {"role": "system", "content": SYSTEM},
        {"role": "user", "content": USER},
    ]

    for i in range(25):
        print(f"\n--- run {run_num} / iteration {i+1} ---")

        resp = requests.post(URL, json={
            "model": MODEL,
            "messages": messages,
            "tools": TOOLS,
            "tool_choice": "auto",
            "stream": True,
            "stream_options": {"include_usage": True},
        }, stream=True, timeout=120)
        resp.raise_for_status()

        # Accumulate streaming response
        content_parts, reasoning_parts = [], []
        tc_by_idx = {}

        for line in resp.iter_lines(decode_unicode=True):
            if not line or not line.startswith("data: "):
                continue
            data = line[len("data: "):]
            if data.strip() == "[DONE]":
                break
            chunk = json.loads(data)
            for choice in chunk.get("choices", []):
                delta = choice.get("delta", {})
                if delta.get("content"):
                    content_parts.append(delta["content"])
                if delta.get("reasoning_content"):
                    reasoning_parts.append(delta["reasoning_content"])
                for tcd in delta.get("tool_calls", []):
                    idx = tcd["index"]
                    if idx not in tc_by_idx:
                        tc_by_idx[idx] = {"id": "", "type": "function", "function": {"name": "", "arguments": ""}}
                    e = tc_by_idx[idx]
                    if tcd.get("id"):
                        e["id"] = tcd["id"]
                    f = tcd.get("function", {})
                    if f.get("name"):
                        e["function"]["name"] += f["name"]
                    if f.get("arguments"):
                        e["function"]["arguments"] += f["arguments"]

        content = "".join(content_parts)
        reasoning_content = "".join(reasoning_parts)
        tool_calls = [tc_by_idx[k] for k in sorted(tc_by_idx)]

        if reasoning_content:
            print(f"  reasoning: {reasoning_content[:120]}...")
        for tc in tool_calls:
            print(f"  tool_call: {tc['function']['name']}({tc['function']['arguments']})")

        if not tool_calls:
            print(f"\n--- FINAL RESPONSE ---\n{content}")
            break

        # Append assistant message with reasoning_content passed back
        assistant_msg = {"role": "assistant"}
        if reasoning_content:
            assistant_msg["reasoning_content"] = reasoning_content
        if content:
            assistant_msg["content"] = content
        assistant_msg["tool_calls"] = tool_calls
        messages.append(assistant_msg)

        # Append tool results
        for tc in tool_calls:
            messages.append({
                "role": "tool",
                "tool_call_id": tc["id"],
                "content": TOOL_RESPONSES.get(tc["function"]["name"], '{"error":"unknown tool"}'),
            })


for run_num in range(1, 3):
    run_scenario(run_num)
