princepride
diff --git a/‎README.md‎
Lines changed: 31 additions & 14 deletions b/‎README.md‎
Lines changed: 31 additions & 14 deletions
diff --git a/‎…s/architecture/high level arch design.md‎ ‎…s/architecture/high_level_arch_design.md‎docs/architecture/high level arch design.md renamed to docs/architecture/high_level_arch_design.md b/‎…s/architecture/high level arch design.md‎ ‎…s/architecture/high_level_arch_design.md‎docs/architecture/high level arch design.md renamed to docs/architecture/high_level_arch_design.md
diff --git a/‎examples/offline_inference/qwen_2_5_omni/README.md‎
Lines changed: 29 additions & 14 deletions b/‎examples/offline_inference/qwen_2_5_omni/README.md‎
Lines changed: 29 additions & 14 deletions
diff --git a/‎examples/offline_inference/qwen_2_5_omni/end2end.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/offline_inference/qwen_2_5_omni/end2end.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/offline_inference/qwen_2_5_omni/run.sh‎
Lines changed: 0 additions & 1 deletion b/‎examples/offline_inference/qwen_2_5_omni/run.sh‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎examples/offline_inference/qwen_2_5_omni/utils.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/offline_inference/qwen_2_5_omni/utils.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎requirements.txt‎
Lines changed: 0 additions & 10 deletions b/‎requirements.txt‎
Lines changed: 0 additions & 10 deletions
diff --git a/‎vllm_omni/config/__init__.py‎
Lines changed: 1 addition & 27 deletions b/‎vllm_omni/config/__init__.py‎
Lines changed: 1 addition & 27 deletions
diff --git a/‎vllm_omni/config/model.py‎
Lines changed: 26 additions & 0 deletions b/‎vllm_omni/config/model.py‎
Lines changed: 26 additions & 0 deletions
@@ -38,11 +38,11 @@ vLLM-omni is built on a modular architecture that extends vLLM's core functional
 
 ## 🛠️ Installation
 
-### Installation of vLLM
+### Environment setup
 
 Use Docker to keep consistent basic environment (Optional, Recommended)
 ```bash
-docker run --gpus all --ipc=host --network=host -v $source_dir:$container_dir --rm --name $container_name -it nvcr.io/nvidia/pytorch:25.01-py3 bash
+docker run --gpus all --ipc=host --network=host -v $source_dir:$container_dir --name $container_name -it nvcr.io/nvidia/pytorch:25.01-py3 bash
 ```
 
 Set up basic uv environment
@@ -51,23 +51,45 @@ pip install uv
 uv venv --python 3.12 --seed
 source .venv/bin/activate
 ```
-Install certain version of vllm with commitid: 808a7b69df479b6b3a16181711cac7ca28a9b941
+
+### Installation of vLLM (for users)
+
+Now we build it based on vLLM v0.11.0. Please install it with command below.
+```bash
+uv pip install vllm==0.11.0 --torch-backend=auto
+```
+
+### Installation of vLLM (for developers)
+
+If you want to check or debug with source code of vLLM, install stable release version of vllm with 0.11.0 from source with pre-built wheel file.
 
 ```bash
 git clone https://github.com/vllm-project/vllm.git
 cd vllm
-git checkout 808a7b69df479b6b3a16181711cac7ca28a9b941
+git checkout v0.11.0
 ```
 Set up environment variables to get pre-built wheels. If there are internet problems, just download the whl file manually. And set VLLM_PRECOMPILED_WHEEL_LOCATION as your local absolute path of whl file.
 ```bash
-export VLLM_COMMIT=808a7b69df479b6b3a16181711cac7ca28a9b941
-export VLLM_PRECOMPILED_WHEEL_LOCATION=https://wheels.vllm.ai/${VLLM_COMMIT}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl
+export VLLM_PRECOMPILED_WHEEL_LOCATION=https://github.com/vllm-project/vllm/releases/download/v0.11.0/vllm-0.11.0-cp38-abi3-manylinux1_x86_64.whl
 ```
-Install vllm with command below.
+Install vllm with command below (If you have no existing PyTorch).
 ```bash
 uv pip install --editable .
 ```
 
+Install vllm with command below (If you already have PyTorch).
+```bash
+python use_existing_torch.py
+uv pip install -r requirements/build.txt
+uv pip install --no-build-isolation --editable .
+```
+
+### Verification for successful installation of vLLM
+Just run the command below. If no error, it demonstrates that the installation is successfull.
+```bash
+python -c "import vllm._C"
+```
+
 ### Installation of vLLM-omni
 Install additional requirements for vllm-omni
 ```bash
@@ -88,11 +110,6 @@ bash run.sh
 ```
 The output audio is saved in ./output_audio
 
-## To-do list
-- [x] Offline inference example for Qwen2.5-omni with single request
-- [ ] Adaptation from current vllm branch to stable vllm v0.11.0
-- [ ] Offline inference example for Qwen2.5-omni with streaming multiple requests
-- [ ] Online inference support
-- [ ] Support for other models
+## Further details
 
-For detailed model management, see [vllm_omni_design.md](docs/architecture/vllm_omni_design.md) and [high_level_arch_design.md](docs/architecture/high_level_arch_design.md).
+For detailed architecture design, see [vllm_omni_design.md](docs/architecture/vllm_omni_design.md) and [high_level_arch_design.md](docs/architecture/high_level_arch_design.md).
@@ -2,11 +2,11 @@
 
 ## 🛠️ Installation
 
-### Installation of vLLM
+### Environment setup
 
 Use Docker to keep consistent basic environment (Optional, Recommended)
 ```bash
-docker run --gpus all --ipc=host --network=host -v $source_dir:$container_dir --rm --name $container_name -it nvcr.io/nvidia/pytorch:25.01-py3 bash
+docker run --gpus all --ipc=host --network=host -v $source_dir:$container_dir --name $container_name -it nvcr.io/nvidia/pytorch:25.01-py3 bash
 ```
 
 Set up basic uv environment
@@ -15,23 +15,45 @@ pip install uv
 uv venv --python 3.12 --seed
 source .venv/bin/activate
 ```
-Install certain version of vllm with commitid: 808a7b69df479b6b3a16181711cac7ca28a9b941
+
+### Installation of vLLM (for users)
+
+Now we build it based on vLLM v0.11.0. Please install it with command below.
+```bash
+uv pip install vllm==0.11.0 --torch-backend=auto
+```
+
+### Installation of vLLM (for developers)
+
+If you want to check or debug with source code of vLLM, install stable release version of vllm with 0.11.0 from source with pre-built wheel file.
 
 ```bash
 git clone https://github.com/vllm-project/vllm.git
 cd vllm
-git checkout 808a7b69df479b6b3a16181711cac7ca28a9b941
+git checkout v0.11.0
 ```
 Set up environment variables to get pre-built wheels. If there are internet problems, just download the whl file manually. And set VLLM_PRECOMPILED_WHEEL_LOCATION as your local absolute path of whl file.
 ```bash
-export VLLM_COMMIT=808a7b69df479b6b3a16181711cac7ca28a9b941
-export VLLM_PRECOMPILED_WHEEL_LOCATION=https://wheels.vllm.ai/${VLLM_COMMIT}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl
+export VLLM_PRECOMPILED_WHEEL_LOCATION=https://github.com/vllm-project/vllm/releases/download/v0.11.0/vllm-0.11.0-cp38-abi3-manylinux1_x86_64.whl
 ```
-Install vllm with command below.
+Install vllm with command below (If you have no existing PyTorch).
 ```bash
 uv pip install --editable .
 ```
 
+Install vllm with command below (If you already have PyTorch).
+```bash
+python use_existing_torch.py
+uv pip install -r requirements/build.txt
+uv pip install --no-build-isolation --editable .
+```
+
+### Verification for successful installation of vLLM
+Just run the command below. If no error, it demonstrates that the installation is successfull.
+```bash
+python -c "import vllm._C"
+```
+
 ### Installation of vLLM-omni
 Install additional requirements for vllm-omni
 ```bash
@@ -51,10 +73,3 @@ Modify PYTHONPATH in run.sh as your path of vllm_omni. Then run.
 bash run.sh
 ```
 The output audio is saved in ./output_audio
-
-## To-do list
-- [x] Offline inference example for Qwen2.5-omni with single request
-- [ ] Adaptation from current vllm branch to stable vllm v0.11.0
-- [ ] Offline inference example for Qwen2.5-omni with streaming multiple requests
-- [ ] Online inference support
-- [ ] Support for other models
 
@@ -7,8 +7,8 @@
 import soundfile as sf
 import torch
 from utils import make_omni_prompt
-from vllm.sampling_params import SamplingParams
 
+from vllm.sampling_params import SamplingParams
 from vllm_omni.entrypoints.omni_llm import OmniLLM
 
 _os_env_toggle.environ["VLLM_USE_V1"] = "1"
 
@@ -1,5 +1,4 @@
 export PYTHONPATH=/path/to/vllm-omni:$PYTHONPATH
-export HF_ENDPOINT=https://hf-mirror.com
 python end2end.py --model Qwen/Qwen2.5-Omni-7B \
                                  --prompts "Explain the system architecture for a scalable audio generation pipeline. Answer in 15 words." \
                                  --voice-type "m02" \
 
@@ -9,8 +9,8 @@
 import torch
 import torchvision.io
 from processing_omni import fetch_image, fetch_video
-from vllm.inputs import TextPrompt
 
+from vllm.inputs import TextPrompt
 from vllm_omni.inputs.data import OmniTokensPrompt
 
 
 
@@ -1,14 +1,4 @@
-cloudpickle==3.1.1
-msgspec==0.19.0
-numpy==2.3.4
 omegaconf==2.3.0
-pydantic==2.12.3
-transformers==4.57.0
-typing_extensions==4.15.0
 decord==0.6.0
 librosa==0.11.0
-packaging==25.0
-Pillow==12.0.0
-Requests==2.32.5
 resampy==0.4.3
-soundfile==0.13.1
@@ -2,33 +2,7 @@
 Configuration module for vLLM-omni.
 """
 
-from typing import Optional
-
-from pydantic import ConfigDict
-from pydantic.dataclasses import dataclass
-from vllm.config import ModelConfig, config
-
-import vllm_omni.model_executor.models as me_models
-
-
-@config
-@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
-class OmniModelConfig(ModelConfig):
-    """Configuration for Omni models, extending the base ModelConfig."""
-
-    stage_id: int = 0
-    model_stage: str = "thinker"
-    model_arch: str = "Qwen2_5OmniForConditionalGeneration"
-    engine_output_type: Optional[str] = None
-
-    @property
-    def registry(self):
-        return me_models.OmniModelRegistry
-
-    @property
-    def architectures(self) -> list[str]:
-        return [self.model_arch]
-
+from vllm_omni.config.model import OmniModelConfig
 
 __all__ = [
     "OmniModelConfig",
 
@@ -0,0 +1,26 @@
+from typing import Optional
+
+from pydantic import ConfigDict
+from pydantic.dataclasses import dataclass
+
+import vllm_omni.model_executor.models as me_models
+from vllm.config import ModelConfig, config
+
+
+@config
+@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
+class OmniModelConfig(ModelConfig):
+    """Configuration for Omni models, extending the base ModelConfig."""
+
+    stage_id: int = 0
+    model_stage: str = "thinker"
+    model_arch: str = "Qwen2_5OmniForConditionalGeneration"
+    engine_output_type: Optional[str] = None
+
+    @property
+    def registry(self):
+        return me_models.OmniModelRegistry
+
+    @property
+    def architectures(self) -> list[str]:
+        return [self.model_arch]
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,4 @@`
`1`	`1`	`export PYTHONPATH=/path/to/vllm-omni:$PYTHONPATH`
`2`		`-export HF_ENDPOINT=https://hf-mirror.com`
`3`	`2`	`python end2end.py --model Qwen/Qwen2.5-Omni-7B \`
`4`	`3`	`--prompts "Explain the system architecture for a scalable audio generation pipeline. Answer in 15 words." \`
`5`	`4`	`--voice-type "m02" \`