vllm-project · Gaohan123 · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026
@@ -228,25 +228,25 @@ steps:
         volumes:
         - "/fsx/hf_cache:/fsx/hf_cache"
 
-  # - label: "Qwen3-TTS E2E Test"
-  #   timeout_in_minutes: 10
-  #   depends_on: image-build
-  #   commands:
-  #     - export VLLM_LOGGING_LEVEL=DEBUG
-  #     - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  #     - pytest -s -v tests/e2e/online_serving/test_qwen3_tts.py
-  #   agents:
-  #     queue: "gpu_4_queue"
-  #   plugins:
-  #     - docker#v5.2.0:
-  #         image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
-  #         always-pull: true
-  #         propagate-environment: true
-  #         shm-size: "8gb"
-  #         environment:
-  #           - "HF_HOME=/fsx/hf_cache"
-  #         volumes:
-  #           - "/fsx/hf_cache:/fsx/hf_cache"
+  - label: "Qwen3-TTS E2E Test"
+    timeout_in_minutes: 10
+    depends_on: image-build
+    commands:
+      - export VLLM_LOGGING_LEVEL=DEBUG
+      - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+      - pytest -s -v tests/e2e/online_serving/test_qwen3_tts.py
+    agents:
+      queue: "gpu_4_queue"
+    plugins:
+      - docker#v5.2.0:
+          image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
+          always-pull: true
+          propagate-environment: true
+          shm-size: "8gb"
+          environment:
+            - "HF_HOME=/fsx/hf_cache"
+          volumes:
+            - "/fsx/hf_cache:/fsx/hf_cache"
 
   # - label: "Omni Model Test with H100"
   #   timeout_in_minutes: 30

@@ -82,16 +82,19 @@ def omni_snapshot_download(model_id) -> str:
     # For other cases (Hugging Face), perform a real download to ensure all
     # necessary files (including *.pt for audio/diffusion) are available locally
     # before stage workers are spawned. This prevents initialization timeouts.
+    # Return the original model_id so that model_config.model preserves
+    # HuggingFace semantics (e.g. "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice")
+    # instead of the resolved cache path.
     try:
-        return download_weights_from_hf_specific(
+        download_weights_from_hf_specific(
             model_name_or_path=model_id,
             cache_dir=None,
             allow_patterns=["*"],
             require_all=True,
         )
     except huggingface_hub.errors.RepositoryNotFoundError:
         logger.warning(f"Repository not found for '{model_id}'.")
-        return model_id
+    return model_id
 
 
 class OmniBase:

@@ -81,7 +81,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             torch_dtype=torch.bfloat16,
             **attn_kwargs,
         )
-        self.task_type = model_path.split("-")[-1].strip("/")
+        self.task_type = model_path.split("-")[-1].split("/")[0]
         # Mark that this model produces multimodal outputs
         self.have_multimodal_outputs = True