vllm-project · hsliuustc0106 · Dec 23, 2025 · Dec 19, 2025 · Dec 19, 2025 · Dec 19, 2025
@@ -24,6 +24,18 @@ steps:
     agents:
       queue: "cpu_queue_premerge"
 
+  - label: "Image Generation API Test"
+    depends_on: image-build
+    commands:
+      - pytest -s -v tests/entrypoints/openai/
+    agents:
+      queue: "cpu_queue_premerge"
+    plugins:
+      - docker#v5.2.0:
+          image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
+          always-pull: true
+          propagate-environment: true
+
   - label: "Diffusion Model Test"
     timeout_in_minutes: 20
     depends_on: image-build

diff --git a/docs/.nav.yml b/docs/.nav.yml
@@ -4,6 +4,9 @@ nav:
   - Getting Started:
     - getting_started/quickstart.md
     - getting_started/installation
+  - Serving:
+    - OpenAI-Compatible API:
+      - Image Generation: serving/image_generation_api.md
   - Examples:
     - examples/README.md
     - Offline Inference:

diff --git a/docs/mkdocs/hooks/generate_api_readme.py b/docs/mkdocs/hooks/generate_api_readme.py
@@ -136,6 +136,15 @@ def scan_package(package_name: str = "vllm_omni") -> dict[str, list[str]]:
             relative_path = py_file.relative_to(ROOT_DIR)
             module_path = str(relative_path.with_suffix("")).replace("/", ".").replace("\\", ".")
 
+            # Skip excluded modules (avoid importing vllm during docs build)
+            excluded_prefixes = [
+                "vllm_omni.diffusion.models.qwen_image",
+                "vllm_omni.entrypoints.async_diffusion",
+                "vllm_omni.entrypoints.openai",
+            ]
+            if any(module_path.startswith(prefix) for prefix in excluded_prefixes):
+                continue
+
             # Handle __init__.py - use parent module path
             if py_file.name == "__init__.py":
                 # Remove .__init__ from module path

diff --git a/docs/serving/image_generation_api.md b/docs/serving/image_generation_api.md
@@ -0,0 +1,249 @@
+# Image Generation API
+
+vLLM-Omni provides an OpenAI DALL-E compatible API for text-to-image generation using diffusion models.
+
+Each server instance runs a single model (specified at startup via `vllm serve <model> --omni`).
+
+## Quick Start
+
+### Start the Server
+
+For example...
+
+```bash
+# Qwen-Image
+vllm serve Qwen/Qwen-Image --omni --port 8000
+
+# Z-Image Turbo
+vllm serve Tongyi-MAI/Z-Image-Turbo --omni --port 8000
+```
+
+### Generate Images
+
+**Using curl:**
+
+```bash
+curl -X POST http://localhost:8000/v1/images/generations \
+  -H "Content-Type: application/json" \
+  -d '{
+    "prompt": "a dragon laying over the spine of the Green Mountains of Vermont",
+    "size": "1024x1024",
+    "seed": 42
+  }' | jq -r '.data[0].b64_json' | base64 -d > dragon.png
+```
+
+**Using Python:**
+
+```python
+import requests
+import base64
+from PIL import Image
+import io
+
+response = requests.post(
+    "http://localhost:8000/v1/images/generations",
+    json={
+        "prompt": "a black and white cat wearing a princess tiara",
+        "size": "1024x1024",
+        "num_inference_steps": 50,
+        "seed": 42,
+    }
+)
+
+# Decode and save
+img_data = response.json()["data"][0]["b64_json"]
+img_bytes = base64.b64decode(img_data)
+img = Image.open(io.BytesIO(img_bytes))
+img.save("cat.png")
+```
+
+**Using OpenAI SDK:**
+
+```python
+from openai import OpenAI
+
+client = OpenAI(base_url="http://localhost:8000/v1", api_key="none")
+
+response = client.images.generate(
+    model="Qwen/Qwen-Image",
+    prompt="a horse jumping over a fence nearby a babbling brook",
+    n=1,
+    size="1024x1024",
+    response_format="b64_json"
+)
+
+# Note: Extension parameters (seed, steps, cfg) require direct HTTP requests
+```
+
+## API Reference
+
+### Endpoint
+
+```
+POST /v1/images/generations
+Content-Type: application/json
+```
+
+### Request Parameters
+
+#### OpenAI Standard Parameters
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `prompt` | string | **required** | Text description of the desired image |
+| `model` | string | server's model | Model to use (optional, should match server if specified) |
+| `n` | integer | 1 | Number of images to generate (1-10) |
+| `size` | string | model defaults | Image dimensions in WxH format (e.g., "1024x1024", "512x512") |
+| `response_format` | string | "b64_json" | Response format (only "b64_json" supported) |
+| `user` | string | null | User identifier for tracking |
+
+#### vllm-omni Extension Parameters
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `negative_prompt` | string | null | Text describing what to avoid in the image |
+| `num_inference_steps` | integer | model defaults | Number of diffusion steps |
+| `guidance_scale` | float | model defaults | Classifier-free guidance scale (typically 0.0-20.0) |
+| `true_cfg_scale` | float | model defaults | True CFG scale (model-specific parameter, may be ignored if not supported) |
+| `seed` | integer | null | Random seed for reproducibility |
+
+### Response Format
+
+```json
+{
+  "created": 1701234567,
+  "data": [
+    {
+      "b64_json": "<base64-encoded PNG>",
+      "url": null,
+      "revised_prompt": null
+    }
+  ]
+}
+```
+
+## Examples
+
+### Multiple Images
+
+```bash
+curl -X POST http://localhost:8000/v1/images/generations \
+  -H "Content-Type: application/json" \
+  -d '{
+    "prompt": "a steampunk city set in a valley of the Adirondack mountains",
+    "n": 4,
+    "size": "1024x1024",
+    "seed": 123
+  }'
+```
+
+This generates 4 images in a single request.
+
+### With Negative Prompt
+
+```python
+response = requests.post(
+    "http://localhost:8000/v1/images/generations",
+    json={
+        "prompt": "a portrait of a skier in deep powder snow",
+        "negative_prompt": "blurry, low quality, distorted, ugly",
+        "num_inference_steps": 100,
+        "size": "1024x1024",
+    }
+)
+```
+
+## Parameter Handling
+
+The API passes parameters directly to the diffusion pipeline without model-specific transformation:
+
+- **Default values**: When parameters are not specified, the underlying model uses its own defaults
+- **Pass-through design**: User-provided values are forwarded directly to the diffusion engine
+- **Minimal validation**: Only basic type checking and range validation at the API level
+
+### Parameter Compatibility
+
+The API passes parameters directly to the diffusion pipeline without model-specific validation.
+
+- Unsupported parameters may be silently ignored by the model
+- Incompatible values will result in errors from the underlying pipeline
+- Recommended values vary by model - consult model documentation
+
+**Best Practice:** Start with the model's recommended parameters, then adjust based on your needs.
+
+## Error Responses
+
+### 400 Bad Request
+
+Invalid parameters (e.g., model mismatch):
+
+```json
+{
+  "detail": "Invalid size format: '1024x'. Expected format: 'WIDTHxHEIGHT' (e.g., '1024x1024')."
+}
+```
+
+### 422 Unprocessable Entity
+
+Validation errors (missing required fields):
+
+```json
+{
+  "detail": [
+    {
+      "loc": ["body", "prompt"],
+      "msg": "field required",
+      "type": "value_error.missing"
+    }
+  ]
+}
+```
+
+### 503 Service Unavailable
+
+Diffusion engine not initialized:
+
+```json
+{
+  "detail": "Diffusion engine not initialized. Start server with a diffusion model."
+}
+```
+
+## Troubleshooting
+
+### Server Not Running
+
+```bash
+# Check if server is responding
+curl http://localhost:8000/v1/images/generations \
+  -H "Content-Type: application/json" \
+  -d '{"prompt": "test"}'
+```
+
+### Out of Memory
+
+If you encounter OOM errors:
+1. Reduce image size: `"size": "512x512"`
+2. Reduce inference steps: `"num_inference_steps": 25`
+3. Generate fewer images: `"n": 1`
+
+## Testing
+
+Run the test suite to verify functionality:
+
+```bash
+# All image generation tests
+pytest tests/entrypoints/openai/test_image_server.py -v
+
+# Specific test
+pytest tests/entrypoints/openai/test_image_server.py::test_generate_single_image -v
+```
+
+## Development
+
+Enable debug logging to see prompts and generation details:
+
+```bash
+vllm serve Qwen/Qwen-Image --omni \
+  --uvicorn-log-level debug
+```
@@ -89,6 +89,9 @@ plugins:
       exclude:
         - "re:vllm_omni\\._.*"  # Internal modules
         - "vllm_omni.diffusion.models.qwen_image"  # avoid importing vllm in mkdocs building
+        - "vllm_omni.entrypoints.async_diffusion"  # avoid importing vllm in mkdocs building
+        - "vllm_omni.entrypoints.openai"  # avoid importing vllm in mkdocs building
+        - "vllm_omni.entrypoints.openai.protocol"  # avoid importing vllm in mkdocs building
   - mkdocstrings:
       handlers:
         python: