import json
import time
import os
import requests
import logging

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger("llm-test")

def get_openai_compatible_playlist_name(server_url, model_name, full_prompt, api_key="no-key-needed"):
    is_openai_format = api_key != "no-key-needed" or "openai" in server_url.lower() or "openrouter" in server_url.lower()

    headers = {
        "Content-Type": "application/json"
    }

    if api_key and api_key != "no-key-needed":
        headers["Authorization"] = f"Bearer {api_key}"

    if "openrouter" in server_url.lower():
        headers["HTTP-Referer"] = "https://github.com/NeptuneHub/AudioMuse-AI"
        headers["X-Title"] = "AudioMuse-AI"

    if is_openai_format:
        payload = {
            "model": model_name,
            "messages": [{"role": "user", "content": full_prompt}],
            "stream": True,
            "temperature": 0.9,
            "max_tokens": 5000
        }
    else:
        payload = {
            "model": model_name,
            "prompt": full_prompt,
            "stream": True,
            "options": {
                "num_predict": 5000,
                "temperature": 0.9
            }
        }

    try:
        logger.debug(f"POST {server_url}")

        response = requests.post(server_url, headers=headers, data=json.dumps(payload), stream=True, timeout=960)
        response.raise_for_status()

        full_raw_response_content = ""

        for line in response.iter_lines():
            if not line:
                continue

            line_str = line.decode('utf-8', errors='ignore').strip()

            if line_str.startswith(':'):
                continue

            if line_str.startswith('data: '):
                line_str = line_str[6:]
                if line_str == '[DONE]':
                    break

            try:
                chunk = json.loads(line_str)

                if is_openai_format:
                    if "choices" in chunk and chunk["choices"]:
                        choice = chunk["choices"][0]

                        if choice.get("finish_reason") == "stop":
                            break

                        # SAFE extraction
                        content_piece = None
                        if "delta" in choice:
                            content_piece = choice["delta"].get("content")
                        elif "text" in choice:
                            content_piece = choice.get("text")

                        if isinstance(content_piece, str):
                            full_raw_response_content += content_piece

                else:
                    if chunk.get("response"):
                        full_raw_response_content += chunk["response"]
                    if chunk.get("done"):
                        break

            except json.JSONDecodeError:
                continue

        extracted_text = full_raw_response_content.strip()

        return extracted_text

    except Exception as e:
        return f"Error: {e}"

# ---------------------------------------------------
# Test runner
# ---------------------------------------------------

if __name__ == "__main__":
    SERVER_URL = "http://172.17.0.1:12434/engines/llama.cpp/v1/chat/completions"
    MODEL = "ai/qwen3:0.6B-Q4_0"
    API_KEY = "test-api-key"
    PROMPT = "say hi"

    print("\n=== Testing Local OpenAI-Compatible LLM ===\n")
    result = get_openai_compatible_playlist_name(
        server_url=SERVER_URL,
        model_name=MODEL,
        full_prompt=PROMPT,
        api_key=API_KEY
    )

    print("LLM Response:")
    print(result)
