fix(vertex_ai): Correctly map 429 Resource Exhausted to RateLimitError (BerriAI#16363)

AlanPonnachan · LingXuanYin · commit f6ddfeb37aba · 2025-11-14T16:19:44.000+08:00
diff --git a/litellm/litellm_core_utils/exception_mapping_utils.py b/litellm/litellm_core_utils/exception_mapping_utils.py
@@ -1288,6 +1288,7 @@ def exception_type(  # type: ignore  # noqa: PLR0915
                 elif (
                     "429 Quota exceeded" in error_str
                     or "Quota exceeded for" in error_str
+                    or "Resource exhausted" in error_str
                     or "IndexError: list index out of range" in error_str
                     or "429 Unable to submit request because the service is temporarily out of capacity."
                     in error_str
diff --git a/tests/test_litellm/litellm_core_utils/test_exception_mapping_utils.py b/tests/test_litellm/litellm_core_utils/test_exception_mapping_utils.py
@@ -1,32 +1,91 @@
 import pytest
+import litellm
 
 from litellm.litellm_core_utils.exception_mapping_utils import ExceptionCheckers
+from litellm.litellm_core_utils.exception_mapping_utils import exception_type
 
 # Test cases for is_error_str_context_window_exceeded
 # Tuple format: (error_message, expected_result)
 context_window_test_cases = [
     # Positive cases (should return True)
-    ("An error occurred: The input exceeds the model's maximum context limit of 8192 tokens.", True),
-    ("Some text before, this model's maximum context length is 4096 tokens. Some text after.", True),
-    ("Validation Error: string too long. expected a string with maximum length 1000.", True),
+    (
+        "An error occurred: The input exceeds the model's maximum context limit of 8192 tokens.",
+        True,
+    ),
+    (
+        "Some text before, this model's maximum context length is 4096 tokens. Some text after.",
+        True,
+    ),
+    (
+        "Validation Error: string too long. expected a string with maximum length 1000.",
+        True,
+    ),
     ("Your prompt is longer than the model's context length of 2048.", True),
     ("AWS Bedrock Error: The request payload size has exceed context limit.", True),
-    ("Input tokens exceed the configured limit of 272000 tokens. Your messages resulted in 509178 tokens. Please reduce the length of the messages.", True),
-
+    (
+        "Input tokens exceed the configured limit of 272000 tokens. Your messages resulted in 509178 tokens. Please reduce the length of the messages.",
+        True,
+    ),
     # Test case insensitivity
     ("ERROR: THIS MODEL'S MAXIMUM CONTEXT LENGTH IS 1024.", True),
-
     # Negative cases (should return False)
     ("A generic API error occurred.", False),
     ("Invalid API Key provided.", False),
     ("Rate limit reached for requests.", False),
     ("The context is large, but acceptable.", False),
-    ("", False), # Empty string
+    ("", False),  # Empty string
 ]
 
+
 @pytest.mark.parametrize("error_str, expected", context_window_test_cases)
 def test_is_error_str_context_window_exceeded(error_str, expected):
     """
     Tests the is_error_str_context_window_exceeded function with various error strings.
     """
-    assert ExceptionCheckers.is_error_str_context_window_exceeded(error_str) == expected
+    assert ExceptionCheckers.is_error_str_context_window_exceeded(error_str) == expected
+
+
+# Test cases for Vertex AI RateLimitError mapping
+# As per https://github.com/BerriAI/litellm/issues/16189
+vertex_rate_limit_test_cases = [
+    ("429 Quota exceeded for model", True),
+    ("Resource exhausted. Please try again later.", True),
+    (
+        "429 Unable to submit request because the service is temporarily out of capacity.",
+        True,
+    ),
+    ("A generic error occurred.", False),  # Negative case
+]
+
+
+@pytest.mark.parametrize(
+    "error_message, should_raise_rate_limit", vertex_rate_limit_test_cases
+)
+def test_vertex_ai_rate_limit_error_mapping(error_message, should_raise_rate_limit):
+    """
+    Tests that the exception_type function correctly maps Vertex AI's
+    "Resource exhausted" error to a litellm.RateLimitError.
+    """
+    model = "gemini/gemini-2.5-flash"
+    custom_llm_provider = "vertex_ai"
+
+    # Create a generic exception with the specific error message
+    original_exception = Exception(error_message)
+
+    if should_raise_rate_limit:
+        with pytest.raises(litellm.RateLimitError) as excinfo:
+            exception_type(
+                model=model,
+                original_exception=original_exception,
+                custom_llm_provider=custom_llm_provider,
+            )
+        # Check if the raised exception is indeed a RateLimitError
+        assert isinstance(excinfo.value, litellm.RateLimitError)
+    else:
+        # For the negative case, we expect it to raise a generic APIConnectionError
+        with pytest.raises(litellm.APIConnectionError):
+            exception_type(
+                model=model,
+                original_exception=original_exception,
+                custom_llm_provider=custom_llm_provider,
+            )