1111from fastapi import Header , HTTPException
1212
1313from config import config , logger
14- from constants import RATE_LIMIT_ERROR_CODE
14+ from constants import RATE_LIMIT_ERROR_CODE , GOOGLE_LIMIT_ERROR , GLOBAL_LIMIT_ERROR , GLOBAL_LIMIT_PATTERN
1515
1616
1717def get_local_ip () -> str :
@@ -57,7 +57,22 @@ async def verify_access_key(
5757 return True
5858
5959
60- async def is_google_error (data : str ) -> bool :
60+ def check_global_limit (data : str ) -> Optional [str ]:
61+ """
62+ Checks for a global rate limit error message from OpenRouter.
63+
64+ Example message:
65+ "google/gemini-2.0-flash-exp:free is temporarily rate-limited upstream.
66+ Please retry shortly, or add your own key to accumulate your rate limits:
67+ https://openrouter.ai/settings/integrations"
68+ """
69+ if isinstance (data , str ) and GLOBAL_LIMIT_PATTERN in data :
70+ logger .warning ("Model %s is overloaded." , data .split (' ' , 1 )[0 ])
71+ return GLOBAL_LIMIT_ERROR
72+ return None
73+
74+
75+ def check_google_error (data : str ) -> Optional [str ]:
6176 # data = {
6277 # 'error': {
6378 # 'code': 429,
@@ -84,23 +99,9 @@ async def is_google_error(data: str) -> bool:
8499 except Exception as e :
85100 logger .info ("Json.loads error %s" , e )
86101 else :
87- if data ["error" ].get ("status" , "" ) == "RESOURCE_EXHAUSTED" :
88- if config ["openrouter" ]["google_rate_delay" ]:
89- # I think this is global rate limit, so 'retryDelay' is useless
90- # try:
91- # retry_info = next(
92- # (item for item in data['error']['details']
93- # if item.get('@type') == 'type.googleapis.com/google.rpc.RetryInfo'), {}
94- # )
95- # retry_delay = retry_info['retryDelay']
96- # retry_delay_s = int(''.join(c for c in retry_delay if c.isdigit()))
97- # except (TypeError, KeyError, ValueError) as _:
98- # retry_delay_s = GOOGLE_DELAY
99- logger .info ("Google returned RESOURCE_EXHAUSTED, wait %s sec" ,
100- config ["openrouter" ]["google_rate_delay" ])
101- await asyncio .sleep (config ["openrouter" ]["google_rate_delay" ])
102- return True
103- return False
102+ if data .get ("error" , {}).get ("status" , "" ) == "RESOURCE_EXHAUSTED" :
103+ return GOOGLE_LIMIT_ERROR
104+ return None
104105
105106
106107async def check_rate_limit (data : str or bytes ) -> Tuple [bool , Optional [int ]]:
@@ -125,9 +126,13 @@ async def check_rate_limit(data: str or bytes) -> Tuple[bool, Optional[int]]:
125126 try :
126127 x_rate_limit = int (err ["error" ]["metadata" ]["headers" ]["X-RateLimit-Reset" ])
127128 except (TypeError , KeyError ):
128- if (code == RATE_LIMIT_ERROR_CODE and
129- await is_google_error (err ["error" ].get ("metadata" , {}).get ("raw" , "" ))):
130- return False , None
129+ if code == RATE_LIMIT_ERROR_CODE and (raw := err ["error" ].get ("metadata" , {}).get ("raw" , "" )):
130+ issue = check_global_limit (raw ) or check_google_error (raw )
131+ if issue :
132+ if config ["openrouter" ]["global_rate_delay" ]:
133+ logger .info ("%s, waiting %s seconds." , issue , config ["openrouter" ]["global_rate_delay" ])
134+ await asyncio .sleep (config ["openrouter" ]["global_rate_delay" ])
135+ return False , None
131136 x_rate_limit = 0
132137
133138 if x_rate_limit > 0 :
0 commit comments