1616from key_manager import KeyManager
1717from utils import (
1818 verify_access_key ,
19- check_rate_limit_openai ,
19+ check_rate_limit_chat ,
2020 check_rate_limit
2121)
2222
@@ -128,8 +128,10 @@ async def proxy_endpoint(
128128 request , path , api_key , is_stream , is_completion
129129 )
130130
131- except Exception as e :
131+ except ( Exception , HTTPException ) as e :
132132 logger .error ("Error proxying request: %s" , str (e ))
133+ if isinstance (e , HTTPException ):
134+ raise e
133135 raise HTTPException (status_code = 500 , detail = f"Proxy error: { str (e )} " ) from e
134136
135137
@@ -143,25 +145,27 @@ async def handle_completions(
143145 """Handle chat completions using the OpenAI client."""
144146 try :
145147 # Extract headers to forward
146- forward_headers = {}
147- for k , v in request .headers .items ():
148- if k .lower () in ["http-referer" , "x-title" ]:
149- forward_headers [k ] = v
148+ forward_headers = {
149+ k : v
150+ for k , v in request .headers .items ()
151+ if k .lower ()
152+ not in ["host" , "content-length" , "connection" , "authorization" ]
153+ }
150154
151155 # Create a copy of the request body to modify
152156 completion_args = request_body .copy ()
153157
158+ # Ensure we don't pass 'stream' twice
159+ if "stream" in completion_args :
160+ del completion_args ["stream" ]
161+
154162 # Move non-standard parameters that OpenAI SDK doesn't support directly to extra_body
155163 extra_body = {}
156164 openai_unsupported_params = ["include_reasoning" , "transforms" , "route" , "provider" ]
157165 for param in openai_unsupported_params :
158166 if param in completion_args :
159167 extra_body [param ] = completion_args .pop (param )
160168
161- # Ensure we don't pass 'stream' twice
162- if "stream" in completion_args :
163- del completion_args ["stream" ]
164-
165169 # Create a properly formatted request to the OpenAI API
166170 if is_stream :
167171 logger .info ("Making streaming chat completion request" )
@@ -186,7 +190,7 @@ async def stream_response() -> AsyncGenerator[bytes, None]:
186190 logger .error ("Error in streaming response: %s" , err )
187191 # Check if this is a rate limit error
188192 if api_key :
189- has_rate_limit_error_ , reset_time_ms_ = check_rate_limit_openai (err )
193+ has_rate_limit_error_ , reset_time_ms_ = check_rate_limit_chat (err )
190194 if has_rate_limit_error_ :
191195 logger .warning ("Rate limit detected in stream. Disabling key." )
192196 await key_manager .disable_key (
@@ -221,26 +225,30 @@ async def stream_response() -> AsyncGenerator[bytes, None]:
221225 )
222226 except (APIError , Exception ) as e :
223227 logger .error ("Error in chat completions: %s" , str (e ))
224- # Check if this is a rate limit error
225- if api_key and isinstance (e , APIError ):
226- has_rate_limit_error , reset_time_ms = check_rate_limit_openai (e )
227- if has_rate_limit_error :
228- logger .warning ("Rate limit detected in stream. Disabling key." )
229- await key_manager .disable_key (api_key , reset_time_ms )
230-
231- # Try again with a new key
232- new_api_key = await key_manager .get_next_key ()
233- if new_api_key :
234- new_client = await get_openai_client (new_api_key )
235- return await handle_completions (
236- new_client , request , request_body , new_api_key , is_stream
237- )
238-
228+ code = 500
229+ detail = f"Error processing chat completion: { str (e )} "
230+ if isinstance (e , APIError ):
231+ # Check if this is a rate limit error
232+ if api_key :
233+ has_rate_limit_error , reset_time_ms = check_rate_limit_chat (e )
234+ if has_rate_limit_error :
235+ logger .warning ("Rate limit detected in stream. Disabling key." )
236+ await key_manager .disable_key (api_key , reset_time_ms )
237+
238+ # Try again with a new key
239+ new_api_key = await key_manager .get_next_key ()
240+ if new_api_key :
241+ new_client = await get_openai_client (new_api_key )
242+ return await handle_completions (
243+ new_client , request , request_body , new_api_key , is_stream
244+ )
245+ code = e .code or code
246+ detail = e .body or detail
239247 # Raise the exception
240- raise HTTPException (500 , f"Error processing chat completion: { str ( e ) } " ) from e
248+ raise HTTPException (code , detail ) from e
241249
242250
243- async def _check_httpx_err (body : str or bytes , api_key : str or None ):
251+ async def _check_httpx_err (body : str | bytes , api_key : str | None ):
244252 if api_key and (isinstance (body , str ) and body .startswith ("data: " ) or (
245253 isinstance (body , bytes ) and body .startswith (b"data: " ))):
246254 body = body [6 :]
0 commit comments