Skip to content

Commit f6ddfeb

Browse files
AlanPonnachanLingXuanYin
authored andcommitted
fix(vertex_ai): Correctly map 429 Resource Exhausted to RateLimitError (BerriAI#16363)
1 parent e35148a commit f6ddfeb

File tree

2 files changed

+68
-8
lines changed

2 files changed

+68
-8
lines changed

litellm/litellm_core_utils/exception_mapping_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1288,6 +1288,7 @@ def exception_type( # type: ignore # noqa: PLR0915
12881288
elif (
12891289
"429 Quota exceeded" in error_str
12901290
or "Quota exceeded for" in error_str
1291+
or "Resource exhausted" in error_str
12911292
or "IndexError: list index out of range" in error_str
12921293
or "429 Unable to submit request because the service is temporarily out of capacity."
12931294
in error_str
Lines changed: 67 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,91 @@
11
import pytest
2+
import litellm
23

34
from litellm.litellm_core_utils.exception_mapping_utils import ExceptionCheckers
5+
from litellm.litellm_core_utils.exception_mapping_utils import exception_type
46

57
# Test cases for is_error_str_context_window_exceeded
68
# Tuple format: (error_message, expected_result)
79
context_window_test_cases = [
810
# Positive cases (should return True)
9-
("An error occurred: The input exceeds the model's maximum context limit of 8192 tokens.", True),
10-
("Some text before, this model's maximum context length is 4096 tokens. Some text after.", True),
11-
("Validation Error: string too long. expected a string with maximum length 1000.", True),
11+
(
12+
"An error occurred: The input exceeds the model's maximum context limit of 8192 tokens.",
13+
True,
14+
),
15+
(
16+
"Some text before, this model's maximum context length is 4096 tokens. Some text after.",
17+
True,
18+
),
19+
(
20+
"Validation Error: string too long. expected a string with maximum length 1000.",
21+
True,
22+
),
1223
("Your prompt is longer than the model's context length of 2048.", True),
1324
("AWS Bedrock Error: The request payload size has exceed context limit.", True),
14-
("Input tokens exceed the configured limit of 272000 tokens. Your messages resulted in 509178 tokens. Please reduce the length of the messages.", True),
15-
25+
(
26+
"Input tokens exceed the configured limit of 272000 tokens. Your messages resulted in 509178 tokens. Please reduce the length of the messages.",
27+
True,
28+
),
1629
# Test case insensitivity
1730
("ERROR: THIS MODEL'S MAXIMUM CONTEXT LENGTH IS 1024.", True),
18-
1931
# Negative cases (should return False)
2032
("A generic API error occurred.", False),
2133
("Invalid API Key provided.", False),
2234
("Rate limit reached for requests.", False),
2335
("The context is large, but acceptable.", False),
24-
("", False), # Empty string
36+
("", False), # Empty string
2537
]
2638

39+
2740
@pytest.mark.parametrize("error_str, expected", context_window_test_cases)
2841
def test_is_error_str_context_window_exceeded(error_str, expected):
2942
"""
3043
Tests the is_error_str_context_window_exceeded function with various error strings.
3144
"""
32-
assert ExceptionCheckers.is_error_str_context_window_exceeded(error_str) == expected
45+
assert ExceptionCheckers.is_error_str_context_window_exceeded(error_str) == expected
46+
47+
48+
# Test cases for Vertex AI RateLimitError mapping
49+
# As per https://github.com/BerriAI/litellm/issues/16189
50+
vertex_rate_limit_test_cases = [
51+
("429 Quota exceeded for model", True),
52+
("Resource exhausted. Please try again later.", True),
53+
(
54+
"429 Unable to submit request because the service is temporarily out of capacity.",
55+
True,
56+
),
57+
("A generic error occurred.", False), # Negative case
58+
]
59+
60+
61+
@pytest.mark.parametrize(
62+
"error_message, should_raise_rate_limit", vertex_rate_limit_test_cases
63+
)
64+
def test_vertex_ai_rate_limit_error_mapping(error_message, should_raise_rate_limit):
65+
"""
66+
Tests that the exception_type function correctly maps Vertex AI's
67+
"Resource exhausted" error to a litellm.RateLimitError.
68+
"""
69+
model = "gemini/gemini-2.5-flash"
70+
custom_llm_provider = "vertex_ai"
71+
72+
# Create a generic exception with the specific error message
73+
original_exception = Exception(error_message)
74+
75+
if should_raise_rate_limit:
76+
with pytest.raises(litellm.RateLimitError) as excinfo:
77+
exception_type(
78+
model=model,
79+
original_exception=original_exception,
80+
custom_llm_provider=custom_llm_provider,
81+
)
82+
# Check if the raised exception is indeed a RateLimitError
83+
assert isinstance(excinfo.value, litellm.RateLimitError)
84+
else:
85+
# For the negative case, we expect it to raise a generic APIConnectionError
86+
with pytest.raises(litellm.APIConnectionError):
87+
exception_type(
88+
model=model,
89+
original_exception=original_exception,
90+
custom_llm_provider=custom_llm_provider,
91+
)

0 commit comments

Comments
 (0)