diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 9865a4a4be..9e7fc07a9f 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -228,25 +228,25 @@ steps: volumes: - "/fsx/hf_cache:/fsx/hf_cache" - # - label: "Qwen3-TTS E2E Test" - # timeout_in_minutes: 10 - # depends_on: image-build - # commands: - # - export VLLM_LOGGING_LEVEL=DEBUG - # - export VLLM_WORKER_MULTIPROC_METHOD=spawn - # - pytest -s -v tests/e2e/online_serving/test_qwen3_tts.py - # agents: - # queue: "gpu_4_queue" - # plugins: - # - docker#v5.2.0: - # image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT - # always-pull: true - # propagate-environment: true - # shm-size: "8gb" - # environment: - # - "HF_HOME=/fsx/hf_cache" - # volumes: - # - "/fsx/hf_cache:/fsx/hf_cache" + - label: "Qwen3-TTS E2E Test" + timeout_in_minutes: 10 + depends_on: image-build + commands: + - export VLLM_LOGGING_LEVEL=DEBUG + - export VLLM_WORKER_MULTIPROC_METHOD=spawn + - pytest -s -v tests/e2e/online_serving/test_qwen3_tts.py + agents: + queue: "gpu_4_queue" + plugins: + - docker#v5.2.0: + image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT + always-pull: true + propagate-environment: true + shm-size: "8gb" + environment: + - "HF_HOME=/fsx/hf_cache" + volumes: + - "/fsx/hf_cache:/fsx/hf_cache" # - label: "Omni Model Test with H100" # timeout_in_minutes: 30 diff --git a/vllm_omni/entrypoints/omni.py b/vllm_omni/entrypoints/omni.py index 510813c6ab..711185675f 100644 --- a/vllm_omni/entrypoints/omni.py +++ b/vllm_omni/entrypoints/omni.py @@ -82,8 +82,11 @@ def omni_snapshot_download(model_id) -> str: # For other cases (Hugging Face), perform a real download to ensure all # necessary files (including *.pt for audio/diffusion) are available locally # before stage workers are spawned. This prevents initialization timeouts. + # Return the original model_id so that model_config.model preserves + # HuggingFace semantics (e.g. "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice") + # instead of the resolved cache path. try: - return download_weights_from_hf_specific( + download_weights_from_hf_specific( model_name_or_path=model_id, cache_dir=None, allow_patterns=["*"], @@ -91,7 +94,7 @@ def omni_snapshot_download(model_id) -> str: ) except huggingface_hub.errors.RepositoryNotFoundError: logger.warning(f"Repository not found for '{model_id}'.") - return model_id + return model_id class OmniBase: diff --git a/vllm_omni/model_executor/models/qwen3_tts/qwen3_tts.py b/vllm_omni/model_executor/models/qwen3_tts/qwen3_tts.py index 8514a725d4..664d2a2957 100644 --- a/vllm_omni/model_executor/models/qwen3_tts/qwen3_tts.py +++ b/vllm_omni/model_executor/models/qwen3_tts/qwen3_tts.py @@ -81,7 +81,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): torch_dtype=torch.bfloat16, **attn_kwargs, ) - self.task_type = model_path.split("-")[-1].strip("/") + self.task_type = model_path.split("-")[-1].split("/")[0] # Mark that this model produces multimodal outputs self.have_multimodal_outputs = True