BerriAI · AlexsanderHamir · Nov 18, 2025 · Nov 18, 2025 · Nov 19, 2025 · Nov 19, 2025
diff --git a/litellm/__init__.py b/litellm/__init__.py
diff --git a/litellm/_lazy_imports.py b/litellm/_lazy_imports.py
diff --git a/litellm/images/main.py b/litellm/images/main.py
@@ -6,11 +6,16 @@
 import httpx
 
 import litellm
-from litellm import Logging, client, exception_type, get_litellm_params
+from litellm.utils import exception_type, get_litellm_params
+# client is imported from litellm as it's a decorator
+from litellm import client
 from litellm.constants import DEFAULT_IMAGE_ENDPOINT_MODEL
 from litellm.constants import request_timeout as DEFAULT_REQUEST_TIMEOUT
 from litellm.exceptions import LiteLLMUnknownProvider
-from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+# Logging is imported lazily when needed to avoid loading litellm_logging at import time
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging, Logging as LiteLLMLoggingObj
 from litellm.litellm_core_utils.mock_functions import mock_image_generation
 from litellm.llms.base_llm import BaseImageEditConfig, BaseImageGenerationConfig
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
@@ -263,6 +268,8 @@ def image_generation(  # noqa: PLR0915
 
         litellm_params_dict = get_litellm_params(**kwargs)
 
+        # Import Logging lazily only when needed
+        from litellm.litellm_core_utils.litellm_logging import Logging
         logging: Logging = litellm_logging_obj
         logging.update_environment_variables(
             model=model,

diff --git a/litellm/integrations/custom_batch_logger.py b/litellm/integrations/custom_batch_logger.py
@@ -10,6 +10,8 @@
 
 import litellm
 from litellm._logging import verbose_logger
+# Import CustomLogger lazily to break circular dependency:
+# custom_logger -> caching.caching -> gcs_cache -> gcs_bucket_base -> custom_batch_logger -> custom_logger
 from litellm.integrations.custom_logger import CustomLogger
 
 

diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py
@@ -16,7 +16,12 @@
 from pydantic import BaseModel
 
 from litellm._logging import verbose_logger
-from litellm.caching.caching import DualCache
+# Lazy import DualCache to break circular dependency:
+# custom_logger -> caching.caching -> gcs_cache -> gcs_bucket_base -> custom_batch_logger -> custom_logger
+if TYPE_CHECKING:
+    from litellm.caching.caching import DualCache
+else:
+    DualCache = Any  # Will be imported lazily when needed
 from litellm.constants import DEFAULT_MAX_RECURSE_DEPTH_SENSITIVE_DATA_MASKER
 from litellm.types.integrations.argilla import ArgillaItem
 from litellm.types.llms.openai import AllMessageValues, ChatCompletionRequest
@@ -289,7 +294,7 @@ async def async_dataset_hook(
     async def async_pre_call_hook(
         self,
         user_api_key_dict: UserAPIKeyAuth,
-        cache: DualCache,
+        cache: "DualCache",  # Use string annotation to avoid import at module level
         data: dict,
         call_type: CallTypesLiteral,
     ) -> Optional[

diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py
@@ -24,7 +24,6 @@
 from litellm.types.integrations.prometheus import *
 from litellm.types.integrations.prometheus import _sanitize_prometheus_label_name
 from litellm.types.utils import StandardLoggingPayload
-from litellm.utils import get_end_user_id_for_cost_tracking
 
 if TYPE_CHECKING:
     from apscheduler.schedulers.asyncio import AsyncIOScheduler
@@ -778,6 +777,9 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti
         model = kwargs.get("model", "")
         litellm_params = kwargs.get("litellm_params", {}) or {}
         _metadata = litellm_params.get("metadata", {})
+        # Lazy import to avoid loading utils.py at import time (60MB saved)
+        from litellm.utils import get_end_user_id_for_cost_tracking
+
         end_user_id = get_end_user_id_for_cost_tracking(
             litellm_params, service_type="prometheus"
         )
@@ -1164,6 +1166,9 @@ async def async_log_failure_event(self, kwargs, response_obj, start_time, end_ti
             "standard_logging_object", {}
         )
         litellm_params = kwargs.get("litellm_params", {}) or {}
+        # Lazy import to avoid loading utils.py at import time (60MB saved)
+        from litellm.utils import get_end_user_id_for_cost_tracking
+
         end_user_id = get_end_user_id_for_cost_tracking(
             litellm_params, service_type="prometheus"
         )
@@ -2249,6 +2254,9 @@ def prometheus_label_factory(
     }
 
     if UserAPIKeyLabelNames.END_USER.value in filtered_labels:
+        # Lazy import to avoid loading utils.py at import time (60MB saved)
+        from litellm.utils import get_end_user_id_for_cost_tracking
+
         filtered_labels["end_user"] = get_end_user_id_for_cost_tracking(
             litellm_params={"user_api_key_end_user_id": enum_values.end_user},
             service_type="prometheus",

diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
@@ -58,7 +58,6 @@
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.integrations.deepeval.deepeval import DeepEvalLogger
 from litellm.integrations.mlflow import MlflowLogger
-from litellm.integrations.prometheus import PrometheusLogger
 from litellm.integrations.sqs import SQSLogger
 from litellm.litellm_core_utils.get_litellm_params import get_litellm_params
 from litellm.litellm_core_utils.llm_cost_calc.tool_call_cost_tracking import (
@@ -3457,6 +3456,9 @@ def _init_custom_logger_compatible_class(  # noqa: PLR0915
             _in_memory_loggers.append(_literalai_logger)
             return _literalai_logger  # type: ignore
         elif logging_integration == "prometheus":
+            # Lazy import to avoid loading prometheus.py and utils.py at import time (60MB saved)
+            from litellm.integrations.prometheus import PrometheusLogger
+
             for callback in _in_memory_loggers:
                 if isinstance(callback, PrometheusLogger):
                     return callback  # type: ignore

diff --git a/litellm/litellm_core_utils/token_counter.py b/litellm/litellm_core_utils/token_counter.py
@@ -15,7 +15,7 @@
     cast,
 )
 
-import tiktoken
+# tiktoken is imported lazily when needed to avoid loading it at import time
 
 import litellm
 from litellm import verbose_logger
@@ -28,7 +28,7 @@
     MAX_TILE_HEIGHT,
     MAX_TILE_WIDTH,
 )
-from litellm.litellm_core_utils.default_encoding import encoding as default_encoding
+# default_encoding is imported lazily when needed to avoid loading tiktoken at import time
 from litellm.llms.custom_httpx.http_handler import _get_httpx_client
 from litellm.types.llms.anthropic import (
     AnthropicMessagesToolResultParam,
@@ -532,6 +532,8 @@ def count_tokens(text: str) -> int:
                 return len(enc.ids)
 
         elif tokenizer_json["type"] == "openai_tokenizer":
+            # Import tiktoken lazily to avoid loading it at import time
+            import tiktoken
             model_to_use = _fix_model_name(model)  # type: ignore
             try:
                 if "gpt-4o" in model_to_use:
@@ -550,6 +552,8 @@ def count_tokens(text: str) -> int:
     else:
 
         def count_tokens(text: str) -> int:
+            # Import default_encoding lazily to avoid loading tiktoken at import time
+            from litellm.litellm_core_utils.default_encoding import encoding as default_encoding
             return len(default_encoding.encode(text, disallowed_special=()))
 
     return count_tokens

diff --git a/litellm/llms/azure/azure.py b/litellm/llms/azure/azure.py
@@ -1020,7 +1020,7 @@ async def aimage_generation(
         headers: dict,
         client=None,
         timeout=None,
-    ) -> litellm.ImageResponse:
+    ) -> ImageResponse:
 
         response: Optional[dict] = None
         try:

diff --git a/litellm/llms/azure_ai/embed/handler.py b/litellm/llms/azure_ai/embed/handler.py
@@ -58,7 +58,7 @@ async def async_image_embedding(
         data: ImageEmbeddingRequest,
         timeout: float,
         logging_obj,
-        model_response: litellm.EmbeddingResponse,
+        model_response: EmbeddingResponse,
         optional_params: dict,
         api_key: Optional[str],
         api_base: Optional[str],
@@ -138,7 +138,7 @@ async def async_embedding(
         input: List,
         timeout: float,
         logging_obj,
-        model_response: litellm.EmbeddingResponse,
+        model_response: EmbeddingResponse,
         optional_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,

diff --git a/litellm/llms/bedrock/chat/invoke_transformations/amazon_nova_transformation.py b/litellm/llms/bedrock/chat/invoke_transformations/amazon_nova_transformation.py
@@ -80,7 +80,7 @@ def transform_response(
         encoding: Any,
         api_key: Optional[str] = None,
         json_mode: Optional[bool] = None,
-    ) -> litellm.ModelResponse:
+    ) -> ModelResponse:
         return AmazonConverseConfig.transform_response(
             self,
             model,

diff --git a/litellm/llms/bedrock/image/amazon_titan_transformation.py b/litellm/llms/bedrock/image/amazon_titan_transformation.py
@@ -7,7 +7,7 @@
 
 from openai.types.image import Image
 
-from litellm import get_model_info
+from litellm.utils import get_model_info
 from litellm.types.llms.bedrock import (
     AmazonNovaCanvasImageGenerationConfig,
     AmazonTitanImageGenerationRequestBody,

diff --git a/litellm/llms/jina_ai/embedding/transformation.py b/litellm/llms/jina_ai/embedding/transformation.py
@@ -11,7 +11,7 @@
 
 import httpx
 
-from litellm import LlmProviders
+from litellm.types.utils import LlmProviders
 from litellm.secret_managers.main import get_secret_str
 from litellm.llms.base_llm.chat.transformation import BaseLLMException
 from litellm.llms.base_llm import BaseEmbeddingConfig

diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py
@@ -28,8 +28,8 @@
 from typing_extensions import overload
 
 import litellm
-from litellm import LlmProviders
 from litellm._logging import verbose_logger
+from litellm.types.utils import LlmProviders
 from litellm.constants import DEFAULT_MAX_RETRIES
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
 from litellm.litellm_core_utils.logging_utils import track_llm_api_timing

diff --git a/litellm/llms/openai_like/chat/handler.py b/litellm/llms/openai_like/chat/handler.py
@@ -10,8 +10,8 @@
 import httpx
 
 import litellm
-from litellm import LlmProviders
 from litellm.llms.bedrock.chat.invoke_handler import MockResponseIterator
+from litellm.types.utils import LlmProviders
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.llms.databricks.streaming_utils import ModelResponseIterator
 from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig

diff --git a/litellm/llms/ovhcloud/chat/transformation.py b/litellm/llms/ovhcloud/chat/transformation.py
@@ -7,7 +7,9 @@
 from typing import Optional, Union, List
 
 import httpx
-from litellm import ModelResponseStream, OpenAIGPTConfig, get_model_info, verbose_logger
+from litellm.utils import ModelResponseStream, get_model_info
+from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
+from litellm._logging import verbose_logger
 from litellm.llms.ovhcloud.utils import OVHCloudException
 from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
 from litellm.llms.base_llm.chat.transformation import BaseLLMException

diff --git a/litellm/llms/together_ai/chat.py b/litellm/llms/together_ai/chat.py
@@ -8,7 +8,8 @@
 
 from typing import Optional
 
-from litellm import get_model_info, verbose_logger
+from litellm.utils import get_model_info
+from litellm._logging import verbose_logger
 
 from ..openai.chat.gpt_transformation import OpenAIGPTConfig
 

diff --git a/litellm/llms/vertex_ai/common_utils.py b/litellm/llms/vertex_ai/common_utils.py
@@ -5,7 +5,8 @@
 import httpx
 
 import litellm
-from litellm import supports_response_schema, supports_system_messages, verbose_logger
+from litellm.utils import supports_response_schema, supports_system_messages
+from litellm._logging import verbose_logger
 from litellm.constants import DEFAULT_MAX_RECURSE_DEPTH
 from litellm.litellm_core_utils.prompt_templates.common_utils import unpack_defs
 from litellm.llms.base_llm.base_utils import BaseLLMModelInfo, BaseTokenCounter

diff --git a/litellm/llms/vertex_ai/files/handler.py b/litellm/llms/vertex_ai/files/handler.py
@@ -4,7 +4,6 @@
 
 import httpx
 
-from litellm import LlmProviders
 from litellm.integrations.gcs_bucket.gcs_bucket_base import (
     GCSBucketBase,
     GCSLoggingConfig,
@@ -17,6 +16,7 @@
     OpenAIFileObject,
 )
 from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
+from litellm.types.utils import LlmProviders
 
 from .transformation import VertexAIJsonlFilesTransformation
 

diff --git a/litellm/llms/vertex_ai/fine_tuning/handler.py b/litellm/llms/vertex_ai/fine_tuning/handler.py
@@ -19,7 +19,7 @@
     ResponseSupervisedTuningSpec,
     ResponseTuningJob,
 )
-from litellm.types.utils import LiteLLMFineTuningJob
+from litellm.types.utils import LiteLLMFineTuningJob, LlmProviders
 
 
 class VertexFineTuningAPI(VertexLLM):
@@ -30,7 +30,7 @@ class VertexFineTuningAPI(VertexLLM):
     def __init__(self) -> None:
         super().__init__()
         self.async_handler = get_async_httpx_client(
-            llm_provider=litellm.LlmProviders.VERTEX_AI,
+            llm_provider=LlmProviders.VERTEX_AI,
             params={"timeout": 600.0},
         )
 

diff --git a/litellm/llms/vertex_ai/gemini_embeddings/batch_embed_content_handler.py b/litellm/llms/vertex_ai/gemini_embeddings/batch_embed_content_handler.py
@@ -8,7 +8,7 @@
 import httpx
 
 import litellm
-from litellm import EmbeddingResponse
+from litellm.types.utils import EmbeddingResponse
 from litellm.llms.custom_httpx.http_handler import (
     AsyncHTTPHandler,
     HTTPHandler,

diff --git a/litellm/llms/vertex_ai/gemini_embeddings/batch_embed_content_transformation.py b/litellm/llms/vertex_ai/gemini_embeddings/batch_embed_content_transformation.py
@@ -6,7 +6,7 @@
 
 from typing import List
 
-from litellm import EmbeddingResponse
+from litellm.types.utils import EmbeddingResponse
 from litellm.types.llms.openai import EmbeddingInput
 from litellm.types.llms.vertex_ai import (
     ContentType,

diff --git a/litellm/llms/vertex_ai/image_generation/image_generation_handler.py b/litellm/llms/vertex_ai/image_generation/image_generation_handler.py
@@ -175,7 +175,7 @@ async def aimage_generation(
         vertex_project: Optional[str],
         vertex_location: Optional[str],
         vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
-        model_response: litellm.ImageResponse,
+        model_response: ImageResponse,
         logging_obj: Any,
         model: str = "imagegeneration",  # vertex ai uses imagegeneration as the default model
         client: Optional[AsyncHTTPHandler] = None,

diff --git a/litellm/llms/vertex_ai/multimodal_embeddings/embedding_handler.py b/litellm/llms/vertex_ai/multimodal_embeddings/embedding_handler.py
@@ -147,13 +147,13 @@ async def async_multimodal_embedding(
         optional_params: dict,
         litellm_params: dict,
         data: dict,
-        model_response: litellm.EmbeddingResponse,
+        model_response: EmbeddingResponse,
         timeout: Optional[Union[float, httpx.Timeout]],
         logging_obj: LiteLLMLoggingObj,
         headers={},
         client: Optional[AsyncHTTPHandler] = None,
         api_key: Optional[str] = None,
-    ) -> litellm.EmbeddingResponse:
+    ) -> EmbeddingResponse:
         if client is None:
             _params = {}
             if timeout is not None:

diff --git a/litellm/llms/vertex_ai/vertex_ai_partner_models/count_tokens/handler.py b/litellm/llms/vertex_ai/vertex_ai_partner_models/count_tokens/handler.py
@@ -125,7 +125,7 @@ async def handle_count_tokens_request(
         headers = {"Authorization": f"Bearer {access_token}"}
 
         # Get async HTTP client
-        from litellm import LlmProviders
+        from litellm.types.utils import LlmProviders
 
         async_client = get_async_httpx_client(llm_provider=LlmProviders.VERTEX_AI)
 

diff --git a/litellm/llms/vertex_ai/vertex_ai_partner_models/main.py b/litellm/llms/vertex_ai/vertex_ai_partner_models/main.py
@@ -6,7 +6,7 @@
 import httpx  # type: ignore
 
 import litellm
-from litellm import LlmProviders
+from litellm.types.utils import LlmProviders
 from litellm.types.llms.vertex_ai import VertexPartnerProvider
 from litellm.utils import ModelResponse
 

diff --git a/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py b/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py
@@ -137,7 +137,7 @@ async def async_embedding(
         self,
         model: str,
         input: Union[list, str],
-        model_response: litellm.EmbeddingResponse,
+        model_response: EmbeddingResponse,
         logging_obj: LiteLLMLoggingObject,
         optional_params: dict,
         custom_llm_provider: Literal[
@@ -152,7 +152,7 @@ async def async_embedding(
         gemini_api_key: Optional[str] = None,
         extra_headers: Optional[dict] = None,
         encoding=None,
-    ) -> litellm.EmbeddingResponse:
+    ) -> EmbeddingResponse:
         """
         Async embedding implementation
         """