Lightning-AI · aniketmaurya · Jun 12, 2025 · Jun 12, 2025 · Jun 12, 2025 · Jun 12, 2025
@@ -29,6 +29,104 @@
 
 
 class LitAPI(ABC):
+    """Define inference logic for the model.
+
+    LitAPI is the core abstraction for serving AI models with LitServe. It provides a clean
+    interface for model loading, request processing, and response generation with automatic
+    optimizations like batching, streaming, and async processing.
+
+    Core Workflow:
+        1. **setup()**: Load and initialize the model once per worker
+        2. **decode_request()**: Convert HTTP request to model input format
+        3. **predict()**: Run model inference on the input
+        4. **encode_response()**: Convert model output to HTTP response format
+
+    Quick Start:
+        ```python
+        import litserve as ls
+
+        class MyAPI(ls.LitAPI):
+            def setup(self, device):
+                self.model = lambda x: x**2
+
+            def predict(self, x):
+                return self.model(x["input"])
+
+        server = ls.LitServer(MyAPI())
+        server.run()
+        ```
+
+    Required Methods:
+        setup(device): Initialize the model and resources
+        predict(x): Core inference logic
+
+    Optional Methods:
+        decode_request(request): Transform HTTP requests to model input
+        encode_response(output): Transform model outputs to HTTP responses
+        batch(inputs)/unbatch(outputs): Custom batching logic
+
+    Configuration:
+        max_batch_size: Batch multiple requests for better GPU utilization. Defaults to 1.
+        batch_timeout: Wait time for batch to fill (seconds). Defaults to 0.0.
+        stream: Enable streaming responses for real-time output. Defaults to False.
+        api_path: URL endpoint path. Defaults to "/predict".
+        enable_async: Enable async/await for non-blocking operations. Defaults to False.
+        spec: API specification (e.g., OpenAISpec for OpenAI compatibility). Defaults to None.
+        mcp: Model Context Protocol integration for AI assistants. Defaults to None.
+
+    Examples:
+        Batched GPU Inference:
+        ```python
+        class BatchedAPI(ls.LitAPI):
+            def setup(self, device):
+                self.model = load_model().to(device)
+
+            def predict(self, batch):
+                return self.model(batch)
+
+        api = BatchedAPI(max_batch_size=8, batch_timeout=0.1)
+        ```
+
+        Streaming LLM:
+        ```python
+        class StreamingLLM(ls.LitAPI):
+            def setup(self, device):
+                self.model = load_llm()
+
+            def predict(self, prompt):
+                for token in self.model.generate_stream(prompt):
+                    yield token
+
+        api = StreamingLLM(stream=True)
+        ```
+
+        OpenAI-Compatible:
+        ```python
+        from litserve.specs import OpenAISpec
+
+        class ChatAPI(ls.LitAPI):
+            def setup(self, device):
+                self.model = load_chat_model()
+
+            def predict(self, messages):
+                return self.model.chat(messages)
+
+        api = ChatAPI(spec=OpenAISpec())
+        ```
+
+    Performance Tips:
+        - Use batching for GPU models to maximize utilization
+        - Enable streaming for operations taking >1 second
+        - Use async for I/O-bound operations (databases, external APIs)
+        - Load models in setup(), not __init__
+        - Monitor GPU memory usage with larger batch sizes
+
+    See Also:
+        - LitServer: Server class for hosting APIs
+        - LitSpec: API specifications for standard interfaces
+
+    """
+
     _stream: bool = False
     _default_unbatch: Optional[Callable] = None
     _spec: Optional[LitSpec] = None
@@ -47,60 +145,7 @@ def __init__(
         mcp: Optional["MCP"] = None,
         enable_async: bool = False,
     ):
-        """Initialize a LitAPI instance that defines the model's inference behavior.
-
-        Args:
-            max_batch_size (int, optional):
-                Maximum requests to batch together for inference. Higher values improve throughput
-                for models that benefit from batching but use more memory. Defaults to 1.
-
-            batch_timeout (float, optional):
-                Maximum seconds to wait for a batch to fill before processing incomplete batches.
-                Lower values reduce latency, higher values improve batching efficiency. Defaults to 0.0.
-
-            api_path (str, optional):
-                URL endpoint path for predictions (e.g., "/predict", "/v1/chat"). Defaults to "/predict".
-
-            stream (bool, optional):
-                Enable streaming responses for real-time output (useful for LLMs, long-running tasks).
-                Requires implementing encode_response() for streaming. Defaults to False.
-
-            loop (Union[str, LitLoop], optional):
-                Inference loop strategy. "auto" selects optimal loop based on batching/streaming settings,
-                or provide custom LitLoop instance for advanced control. Defaults to "auto".
-
-            spec (LitSpec, optional):
-                API specification defining input/output schemas and behavior. Use OpenAISpec for
-                OpenAI-compatible APIs or custom LitSpec implementations. Defaults to None.
-
-            mcp (MCP, optional):
-                Enable MCP server for the API. Provide tool description and input schema. Defaults to None.
-
-            enable_async (bool, optional):
-                Enable async/await support for non-blocking operations in predict() method.
-                Useful for I/O-bound inference or external API calls. Defaults to False.
-
-        Example:
-            >>> # Simple API
-            >>> api = LitAPI()
-
-            >>> # Batched inference
-            >>> api = LitAPI(max_batch_size=8, batch_timeout=0.1)
-
-            >>> # OpenAI-compatible API
-            >>> api = LitAPI(spec=OpenAISpec())
-
-            >>> # Async processing
-            >>> api = LitAPI(enable_async=True)
-
-            >>> # MCP server
-            >>> api = LitAPI(mcp=MCP(description="A simple API", input_schema={"name": "string"}))
-
-        Note:
-            You must implement setup(), predict(), and optionally decode_request()/encode_response()
-            methods to define your model's behavior.
-
-        """
+        """Initialize LitAPI with configuration options."""
 
         if max_batch_size <= 0:
             raise ValueError("max_batch_size must be greater than 0")

@@ -248,17 +248,87 @@ class ToolEndpointType(types.Tool):
 
 
 class MCP:
-    """MCP is a spec that can be used to create MCP tools for LitServe endpoints. It doesn't affect LitAPI.
+    """Enable Model Context Protocol (MCP) integration for LitServe APIs.
 
-    Example:
-        >>> api = LitAPI(mcp=MCP(description="A simple API", input_schema={"name": "string"}))
+    This enables LitServe APIs to be seamlessly integrated into MCP-compatible AI systems,
+    making models accessible as tools within larger AI workflows and agent frameworks.
 
+    Quick Start:
+        ```python
+        from pydantic import BaseModel
+        from litserve.mcp import MCP
+        import litserve as ls
 
-    Spec vs MCP:
-        - specs (like the OpenAI spec) affects the API endpoint, the request-response format, and the LitAPI methods.
-        - MCP, on the other hand, works differently. It doesn't follow the OpenAI spec. Instead, it only uses metadata like the name and description to generate an additional endpoint via MCPServer.
+        class PowerRequest(BaseModel):
+            input: float
 
-    """  # noqa: E501
+        class MyLitAPI(ls.test_examples.SimpleLitAPI):
+            def decode_request(self, request: PowerRequest) -> int:
+                return request.input
+
+        if __name__ == "__main__":
+            mcp=MCP(description="Returns the power of a number.")
+            api = MyLitAPI(mcp=mcp)
+            server = ls.LitServer(api)
+            server.run()
+        ```
+
+    Args:
+        name:
+            Tool name for MCP registration. Defaults to None (uses api_path).
+
+            - Should be descriptive and unique within the MCP server
+            - Automatically converts "/" to "_" for compatibility
+            - Used by AI systems to identify and call the tool
+
+        description:
+            Human-readable description of what the tool does. Defaults to None (uses class docstring).
+
+            - Essential for AI systems to understand when to use the tool
+            - Should clearly explain the purpose and capabilities
+            - Used in tool selection and orchestration
+
+        input_schema:
+            JSON Schema defining expected input format. Defaults to None (auto-extracted).
+
+            - Describes the structure and types of input parameters
+            - Helps AI systems format requests correctly
+            - Auto-extracted from decode_request method if not provided
+
+    Schema Auto-Extraction:
+        If no input_schema is provided, MCP automatically extracts it from type hints in the decode_request method:
+
+        ```python
+        from pydantic import BaseModel
+
+        class Request(BaseModel):
+            input: str
+
+        class AutoSchemaAPI(ls.LitAPI):
+            def decode_request(self, request: Request)->str:
+                # MCP analyzes the type hints to generate schema:
+                # input: str -> {"input": {"type": "string", "title": "Input"}}
+                return request.input
+        ```
+
+        Supported type annotations:
+        - Basic types: `str`, `int`, `float`, `bool`, `list`, `dict`
+        - Optional types: `Optional[str]`, `Union[str, None]`
+        - Pydantic models: Full schema extraction with validation
+        - Complex types: `List[str]`, `Dict[str, Any]`
+
+    Notes:
+        - MCP integration is optional and doesn't affect non-MCP clients
+        - Tool names are automatically sanitized (/ becomes _)
+        - Original API endpoints remain unchanged and fully functional
+        - Compatible with all LitServe features (batching, streaming, etc.)
+
+    See Also:
+        - Model Context Protocol documentation: https://lightning.ai/docs/litserve/features/mcp
+        - LitAPI: Base class for API implementation
+        - LitServer: Server class for hosting APIs
+
+    """
 
     def __init__(
         self,