Skip to content

Commit 0c2c556

Browse files
committed
feat(ratelimit): add global delay for upstream rate limits #1
Introduces a mechanism to handle upstream rate limits from OpenRouter, which can occur when a specific model is overloaded. This generalizes the previous handling that was specific to Google's 'RESOURCE_EXHAUSTED' errors. BREAKING CHANGE: The configuration option openrouter.google_rate_delay has been renamed to openrouter.global_rate_delay to reflect its broader purpose.
1 parent 81f7742 commit 0c2c556

File tree

5 files changed

+50
-36
lines changed

5 files changed

+50
-36
lines changed

README.md

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# OpenRouter Proxy
22

3-
A simple proxy server for OpenRouter API that helps bypass rate limits on free API keys
3+
A simple proxy server for OpenRouter API that helps bypass rate limits on free API keys
44
by rotating through multiple API keys in a round-robin fashion.
55

66
## Features
@@ -64,10 +64,12 @@ openrouter:
6464
# Time in seconds to temporarily disable a key when rate limit is reached by default
6565
rate_limit_cooldown: 14400 # 4 hours
6666
free_only: false # try to show only free models
67-
# Google sometimes returns 429 RESOURCE_EXHAUSTED errors repeatedly, which can cause Roo Code to stop.
68-
# This prevents repeated failures by introducing a delay before retrying.
69-
# google_rate_delay: 10 # in sec
70-
google_rate_delay: 0
67+
# OpenRouter can return a 429 error if a model is overloaded.
68+
# Additionally, Google sometimes returns 429 RESOURCE_EXHAUSTED errors repeatedly,
69+
# which can cause Roo Code to stop.
70+
# This option prevents repeated failures by introducing a delay before retrying.
71+
# global_rate_delay: 10 # in seconds
72+
global_rate_delay: 0
7173
7274
# Proxy settings for outgoing requests to OpenRouter
7375
requestProxy:

config.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -121,14 +121,14 @@ def normalize_and_validate_config(config_data: Dict[str, Any]):
121121
)
122122
openrouter_config["free_only"] = default_free_only
123123

124-
default_google_rate_delay = 0
125-
if not isinstance(openrouter_config.get("google_rate_delay"), (int, float)):
124+
default_global_rate_delay = 0
125+
if not isinstance(openrouter_config.get("global_rate_delay"), (int, float)):
126126
logger.warning(
127-
"'openrouter.google_rate_delay' missing or invalid in config.yml. "
127+
"'openrouter.global_rate_delay' missing or invalid in config.yml. "
128128
"Using default: %s",
129-
default_google_rate_delay
129+
default_global_rate_delay
130130
)
131-
openrouter_config["google_rate_delay"] = default_google_rate_delay
131+
openrouter_config["global_rate_delay"] = default_global_rate_delay
132132

133133
# --- Request Proxy Section ---
134134
if not isinstance(config_data.get("requestProxy"), dict):

config.yml.example

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,12 @@ openrouter:
3131
# Time in seconds to temporarily disable a key when rate limit is reached by default
3232
rate_limit_cooldown: 14400 # 4 hours
3333
free_only: false # try to show only free models
34-
# Google sometimes returns 429 RESOURCE_EXHAUSTED errors repeatedly, which can cause Roo Code to stop.
35-
# This prevents repeated failures by introducing a delay before retrying.
36-
# google_rate_delay: 10 # in sec
37-
google_rate_delay: 0
34+
# OpenRouter can return a 429 error if a model is overloaded.
35+
# Additionally, Google sometimes returns 429 RESOURCE_EXHAUSTED errors repeatedly,
36+
# which can cause Roo Code to stop.
37+
# This option prevents repeated failures by introducing a delay before retrying.
38+
# global_rate_delay: 10 # in seconds
39+
global_rate_delay: 0
3840

3941
# Proxy settings for outgoing requests to OpenRouter
4042
requestProxy:

constants.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,8 @@
1010
RATE_LIMIT_ERROR_CODE = 429
1111

1212
MODELS_ENDPOINTS = ["/api/v1/models"]
13+
14+
GLOBAL_LIMIT_PATTERN = "is temporarily rate-limited upstream"
15+
16+
GOOGLE_LIMIT_ERROR = "Google returned RESOURCE_EXHAUSTED code"
17+
GLOBAL_LIMIT_ERROR = "Model is temporarily rate-limited upstream"

utils.py

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from fastapi import Header, HTTPException
1212

1313
from config import config, logger
14-
from constants import RATE_LIMIT_ERROR_CODE
14+
from constants import RATE_LIMIT_ERROR_CODE, GOOGLE_LIMIT_ERROR, GLOBAL_LIMIT_ERROR, GLOBAL_LIMIT_PATTERN
1515

1616

1717
def get_local_ip() -> str:
@@ -57,7 +57,22 @@ async def verify_access_key(
5757
return True
5858

5959

60-
async def is_google_error(data: str) -> bool:
60+
def check_global_limit(data: str) -> Optional[str]:
61+
"""
62+
Checks for a global rate limit error message from OpenRouter.
63+
64+
Example message:
65+
"google/gemini-2.0-flash-exp:free is temporarily rate-limited upstream.
66+
Please retry shortly, or add your own key to accumulate your rate limits:
67+
https://openrouter.ai/settings/integrations"
68+
"""
69+
if isinstance(data, str) and GLOBAL_LIMIT_PATTERN in data:
70+
logger.warning("Model %s is overloaded.", data.split(' ', 1)[0])
71+
return GLOBAL_LIMIT_ERROR
72+
return None
73+
74+
75+
def check_google_error(data: str) -> Optional[str]:
6176
# data = {
6277
# 'error': {
6378
# 'code': 429,
@@ -84,23 +99,9 @@ async def is_google_error(data: str) -> bool:
8499
except Exception as e:
85100
logger.info("Json.loads error %s", e)
86101
else:
87-
if data["error"].get("status", "") == "RESOURCE_EXHAUSTED":
88-
if config["openrouter"]["google_rate_delay"]:
89-
# I think this is global rate limit, so 'retryDelay' is useless
90-
# try:
91-
# retry_info = next(
92-
# (item for item in data['error']['details']
93-
# if item.get('@type') == 'type.googleapis.com/google.rpc.RetryInfo'), {}
94-
# )
95-
# retry_delay = retry_info['retryDelay']
96-
# retry_delay_s = int(''.join(c for c in retry_delay if c.isdigit()))
97-
# except (TypeError, KeyError, ValueError) as _:
98-
# retry_delay_s = GOOGLE_DELAY
99-
logger.info("Google returned RESOURCE_EXHAUSTED, wait %s sec",
100-
config["openrouter"]["google_rate_delay"])
101-
await asyncio.sleep(config["openrouter"]["google_rate_delay"])
102-
return True
103-
return False
102+
if data.get("error", {}).get("status", "") == "RESOURCE_EXHAUSTED":
103+
return GOOGLE_LIMIT_ERROR
104+
return None
104105

105106

106107
async def check_rate_limit(data: str or bytes) -> Tuple[bool, Optional[int]]:
@@ -125,9 +126,13 @@ async def check_rate_limit(data: str or bytes) -> Tuple[bool, Optional[int]]:
125126
try:
126127
x_rate_limit = int(err["error"]["metadata"]["headers"]["X-RateLimit-Reset"])
127128
except (TypeError, KeyError):
128-
if (code == RATE_LIMIT_ERROR_CODE and
129-
await is_google_error(err["error"].get("metadata", {}).get("raw", ""))):
130-
return False, None
129+
if code == RATE_LIMIT_ERROR_CODE and (raw := err["error"].get("metadata", {}).get("raw", "")):
130+
issue = check_global_limit(raw) or check_google_error(raw)
131+
if issue:
132+
if config["openrouter"]["global_rate_delay"]:
133+
logger.info("%s, waiting %s seconds.", issue, config["openrouter"]["global_rate_delay"])
134+
await asyncio.sleep(config["openrouter"]["global_rate_delay"])
135+
return False, None
131136
x_rate_limit = 0
132137

133138
if x_rate_limit > 0:

0 commit comments

Comments
 (0)