Improve error handling

Aculeasis · Aculeasis · commit 513096094d83 · 2025-03-20T15:13:55.000+03:00
diff --git a/config.yml.example b/config.yml.example
@@ -16,4 +16,4 @@ openrouter:
     - "sk-or-v1-your-third-api-key"
   
   # Time in seconds to temporarily disable a key when rate limit is reached
-  rate_limit_cooldown: 7200  # 2 hours 
+  rate_limit_cooldown: 14400  # 4 hours
diff --git a/key_manager.py b/key_manager.py
@@ -14,7 +14,6 @@
 from config import logger
 
 
-@staticmethod
 def _mask_key(key: str) -> str:
     """Mask an API key for logging purposes."""
     if len(key) <= 8:
diff --git a/routes.py b/routes.py
@@ -9,14 +9,15 @@
 import httpx
 from fastapi import APIRouter, Request, Header, HTTPException
 from fastapi.responses import StreamingResponse, Response
-from openai import AsyncOpenAI
+from openai import AsyncOpenAI, APIError
 
 from config import config, logger
 from constants import OPENROUTER_BASE_URL, PUBLIC_ENDPOINTS, BINARY_ENDPOINTS
 from key_manager import KeyManager
 from utils import (
     verify_access_key,
     check_rate_limit_error,
+    check_rate_limit_openai,
 )
 
 # Create router
@@ -121,7 +122,7 @@ async def proxy_endpoint(
 
     except Exception as e:
         logger.error("Error proxying request: %s", str(e))
-        raise HTTPException(status_code=500, detail=f"Proxy error: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Proxy error: {str(e)}") from e
 
 
 async def handle_chat_completions(
@@ -173,14 +174,17 @@ async def stream_response() -> AsyncGenerator[bytes, None]:
 
                     # Send the end marker
                     yield b"data: [DONE]\n\n"
-                except Exception as e:
-                    logger.error("Error in streaming response: %s", str(e))
+                except APIError as err:
+                    logger.error("Error in streaming response: %s", err)
                     # Check if this is a rate limit error
-                    if "rate limit" in str(e).lower() and api_key:
-                        logger.warning("Rate limit detected in stream. Disabling key.")
-                        await key_manager.disable_key(
-                            api_key, None
-                        )  # Disable without reset time
+                    if api_key:
+                        has_rate_limit_error_, reset_time_ms_ = check_rate_limit_openai(err)
+                        if has_rate_limit_error_:
+                            logger.warning("Rate limit detected in stream. Disabling key.")
+                            await key_manager.disable_key(
+                                api_key, reset_time_ms_
+                            )
+
 
             # Return a streaming response
             return StreamingResponse(
@@ -199,29 +203,33 @@ async def stream_response() -> AsyncGenerator[bytes, None]:
             **completion_args, extra_headers=forward_headers, extra_body=extra_body
         )
 
+        result = response.model_dump()
+        if 'error' in result:
+            raise APIError(result['error'].get("message", "Error"), None, body=result['error'])
+
         # Return the response as JSON
         return Response(
-            content=json.dumps(response.model_dump()), media_type="application/json"
+            content=json.dumps(result), media_type="application/json"
         )
-    except Exception as e:
+    except (APIError, Exception) as e:
         logger.error("Error in chat completions: %s", str(e))
         # Check if this is a rate limit error
-        if "rate limit" in str(e).lower() and api_key:
-            logger.warning(
-                "Rate limit reached for API key. Disabling key and retrying."
-            )
-            await key_manager.disable_key(api_key, None)
-
-            # Try again with a new key
-            new_api_key = await key_manager.get_next_key()
-            if new_api_key:
-                new_client = await get_openai_client(new_api_key)
-                return await handle_chat_completions(
-                    new_client, request, request_body, new_api_key, is_stream
-                )
+        if api_key and isinstance(e, APIError):
+            has_rate_limit_error, reset_time_ms = check_rate_limit_openai(e)
+            if has_rate_limit_error:
+                logger.warning("Rate limit detected in stream. Disabling key.")
+                await key_manager.disable_key(api_key, reset_time_ms)
+
+                # Try again with a new key
+                new_api_key = await key_manager.get_next_key()
+                if new_api_key:
+                    new_client = await get_openai_client(new_api_key)
+                    return await handle_chat_completions(
+                        new_client, request, request_body, new_api_key, is_stream
+                    )
 
         # Raise the exception
-        raise HTTPException(500, f"Error processing chat completion: {str(e)}")
+        raise HTTPException(500, f"Error processing chat completion: {str(e)}") from e
 
 
 async def proxy_with_httpx(
@@ -311,9 +319,7 @@ async def proxy_with_httpx(
                         status_code=503,
                         media_type="application/json",
                     )
-                raise HTTPException(
-                    status_code=503, detail="Unable to connect to OpenRouter API"
-                )
+                raise HTTPException(503, "Unable to connect to OpenRouter API") from e
 
             # Handle binary responses
             if is_binary:
@@ -387,17 +393,13 @@ async def stream_sse():
 
         except httpx.ConnectError as e:
             logger.error("Connection error to OpenRouter: %s", str(e))
-            raise HTTPException(
-                status_code=503, detail="Unable to connect to OpenRouter API"
-            )
+            raise HTTPException(503, "Unable to connect to OpenRouter API") from e
         except httpx.TimeoutException as e:
             logger.error("Timeout connecting to OpenRouter: %s", str(e))
-            raise HTTPException(
-                status_code=504, detail="OpenRouter API request timed out"
-            )
+            raise HTTPException(504, "OpenRouter API request timed out") from e
         except Exception as e:
             logger.error("Error proxying request with httpx: %s", str(e))
-            raise HTTPException(status_code=500, detail=f"Proxy error: {str(e)}")
+            raise HTTPException(500, f"Proxy error: {str(e)}") from e
 
 
 @router.get("/health")
diff --git a/test.py b/test.py
@@ -11,9 +11,11 @@
 from openai import AsyncOpenAI  # Use the OpenAI library
 
 
-# Load configuration from config.yml
 def load_config():
-    with open("config.yml", "r") as file:
+    """
+    Load configuration from config.yml
+    """
+    with open("config.yml", encoding="utf-8") as file:
         return yaml.safe_load(file)
 
 # Get configuration
@@ -112,9 +114,9 @@ async def test_openrouter_streaming():
 
         print("\n" + "-" * 50)
         if request_data["stream"]:
-          print("\nStream completed!")
+            print("\nStream completed!")
         else:
-          print("\nNon-streaming response completed!")
+            print("\nNon-streaming response completed!")
 
     except Exception as e:
         print(f"Error occurred during test: {str(e)}")
diff --git a/utils.py b/utils.py
@@ -8,6 +8,7 @@
 
 from fastapi import Header, HTTPException
 from httpx import Response
+from openai import APIError
 
 from config import logger
 from constants import RATE_LIMIT_ERROR_MESSAGE, RATE_LIMIT_ERROR_CODE
@@ -56,6 +57,30 @@ async def verify_access_key(
 
     return True
 
+def check_rate_limit_openai(err: APIError) -> Tuple[bool, Optional[int]]:
+    """
+    Check for rate limit error.
+
+    Args:
+        err: OpenAI APIError
+
+    Returns:
+        Tuple (has_rate_limit_error, reset_time_ms)
+    """
+    has_rate_limit_error = False
+    reset_time_ms = None
+
+    if err.code == RATE_LIMIT_ERROR_CODE and isinstance(err.body, dict):
+        try:
+            reset_time_ms = int(err.body["metadata"]["headers"]["X-RateLimit-Reset"])
+            has_rate_limit_error = True
+        except Exception as _:
+            pass
+
+    if reset_time_ms is None and RATE_LIMIT_ERROR_MESSAGE in err.message:
+        has_rate_limit_error = True
+
+    return has_rate_limit_error, reset_time_ms
 
 def check_rate_limit_error(response: Response) -> Tuple[bool, Optional[int]]:
     """
@@ -70,14 +95,6 @@ def check_rate_limit_error(response: Response) -> Tuple[bool, Optional[int]]:
     has_rate_limit_error = False
     reset_time_ms = None
 
-    # Check headers
-    if "X-RateLimit-Reset" in response.headers:
-        try:
-            reset_time_ms = int(response.headers["X-RateLimit-Reset"])
-            logger.info(f"Found X-RateLimit-Reset in headers: {reset_time_ms}s", )
-        except (ValueError, TypeError) as e:
-            logger.warning(f"Failed to parse X-RateLimit-Reset header: {e}s", )
-
     # Check response content if it's JSON
     content_type = response.headers.get('content-type', '')
     if 'application/json' in content_type:
@@ -93,10 +110,10 @@ def check_rate_limit_error(response: Response) -> Tuple[bool, Optional[int]]:
                         reset_time_ms = int(data[
     "error"]["metadata"]["headers"]["X-RateLimit-Reset"])
                         logger.info(
-    f"Found X-RateLimit-Reset in response metadata: {reset_time_ms}")
+    "Found X-RateLimit-Reset in response metadata: %s", reset_time_ms)
                     except (ValueError, TypeError) as e:
-                        logger.warning(f"Failed to parse X-RateLimit-Reset from metadata: {e}s", )
+                        logger.warning("Failed to parse X-RateLimit-Reset from metadata: %s", e)
         except Exception as e:
-            logger.debug(f"Error parsing JSON response: {e}s", )
+            logger.debug("Error parsing JSON response: %s", e)
 
     return has_rate_limit_error, reset_time_ms