Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 19 additions & 19 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -228,25 +228,25 @@ steps:
volumes:
- "/fsx/hf_cache:/fsx/hf_cache"

# - label: "Qwen3-TTS E2E Test"
# timeout_in_minutes: 10
# depends_on: image-build
# commands:
# - export VLLM_LOGGING_LEVEL=DEBUG
# - export VLLM_WORKER_MULTIPROC_METHOD=spawn
# - pytest -s -v tests/e2e/online_serving/test_qwen3_tts.py
# agents:
# queue: "gpu_4_queue"
# plugins:
# - docker#v5.2.0:
# image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
# always-pull: true
# propagate-environment: true
# shm-size: "8gb"
# environment:
# - "HF_HOME=/fsx/hf_cache"
# volumes:
# - "/fsx/hf_cache:/fsx/hf_cache"
- label: "Qwen3-TTS E2E Test"
timeout_in_minutes: 10
depends_on: image-build
commands:
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -s -v tests/e2e/online_serving/test_qwen3_tts.py
agents:
queue: "gpu_4_queue"
plugins:
- docker#v5.2.0:
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
always-pull: true
propagate-environment: true
shm-size: "8gb"
environment:
- "HF_HOME=/fsx/hf_cache"
volumes:
- "/fsx/hf_cache:/fsx/hf_cache"

# - label: "Omni Model Test with H100"
# timeout_in_minutes: 30
Expand Down
7 changes: 5 additions & 2 deletions vllm_omni/entrypoints/omni.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,16 +82,19 @@ def omni_snapshot_download(model_id) -> str:
# For other cases (Hugging Face), perform a real download to ensure all
# necessary files (including *.pt for audio/diffusion) are available locally
# before stage workers are spawned. This prevents initialization timeouts.
# Return the original model_id so that model_config.model preserves
# HuggingFace semantics (e.g. "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice")
# instead of the resolved cache path.
try:
return download_weights_from_hf_specific(
download_weights_from_hf_specific(
model_name_or_path=model_id,
cache_dir=None,
allow_patterns=["*"],
require_all=True,
)
except huggingface_hub.errors.RepositoryNotFoundError:
logger.warning(f"Repository not found for '{model_id}'.")
return model_id
return model_id


class OmniBase:
Expand Down
2 changes: 1 addition & 1 deletion vllm_omni/model_executor/models/qwen3_tts/qwen3_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
torch_dtype=torch.bfloat16,
**attn_kwargs,
)
self.task_type = model_path.split("-")[-1].strip("/")
self.task_type = model_path.split("-")[-1].split("/")[0]
# Mark that this model produces multimodal outputs
self.have_multimodal_outputs = True

Expand Down