vllm-project
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/offline_inference/qwen2_5_omni/end2end.py‎
Lines changed: 13 additions & 0 deletions b/‎examples/offline_inference/qwen2_5_omni/end2end.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎examples/offline_inference/qwen2_5_omni/run_single_prompt.sh‎
Lines changed: 13 additions & 2 deletions b/‎examples/offline_inference/qwen2_5_omni/run_single_prompt.sh‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎examples/offline_inference/tmp_test/README.md‎
Lines changed: 34 additions & 0 deletions b/‎examples/offline_inference/tmp_test/README.md‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎examples/offline_inference/tmp_test/end2end.py‎
Lines changed: 264 additions & 0 deletions b/‎examples/offline_inference/tmp_test/end2end.py‎
Lines changed: 264 additions & 0 deletions
diff --git a/‎examples/offline_inference/tmp_test/extract_prompts.py‎
Lines changed: 45 additions & 0 deletions b/‎examples/offline_inference/tmp_test/extract_prompts.py‎
Lines changed: 45 additions & 0 deletions
@@ -235,3 +235,4 @@ configs/development.yaml
 # Docker
 .dockerignore
 Dockerfile.dev
+discussion
@@ -283,6 +283,19 @@ def parse_args():
         default=None,
         help="Path to a .txt file with one prompt per line (preferred).",
     )
+    parser.add_argument(
+        "--worker-backend",
+        type=str,
+        default="process",
+        choices=["process","ray"],
+        help="backend"
+    )
+    parser.add_argument(
+        "--ray-address",
+        type=str,
+        default=None,
+        help="Path to a .txt file with one prompt per line (preferred).",
+
 
     return parser.parse_args()
 
 
@@ -1,2 +1,13 @@
-python end2end.py --output-wav output_audio \
-                  --query-type use_audio_in_video
+#python end2end.py --output-wav output_audio \
+#                  --query-type use_audio_in_video
+export PYTHONPATH=/workspace/omni/vllm-omni/:$PYTHONPATH
+CUDA_VISIBLE_DEVICES=0,1,2,3 python end2end.py --model /workspace/Qwen2.5-Omni-7B/ \
+                                 --voice-type "m02" \
+                                 --dit-ckpt none \
+                                 --bigvgan-ckpt none \
+                                 --output-wav output_audio \
+                                 --prompt_type text \
+                                 --init-sleep-seconds 0 \
+                                 --worker-backend ray \
+                                 --ray-address auto \
+                                 --prompts "Explain the system architecture for a scalable audio generation pipeline. Answer in 15 words."
@@ -0,0 +1,34 @@
+# Offline Example of vLLM-omni for Qwen2.5-omni
+
+## 🛠️ Installation
+
+Please refer to [README.md](../../../README.md)
+
+## Run examples (Qwen2.5-omni)
+### Multiple Prompts
+Download dataset from [seed_tts](https://drive.google.com/file/d/1GlSjVfSHkW3-leKKBlfrjuuTGqQ_xaLP/edit). To get the prompt, you can:
+```bash
+tar -xf <Your Download Path>/seedtts_testset.tar
+cp seedtts_testset/en/meta.lst examples/offline_inference/qwen2_5_omni/meta.lst
+python3 examples/offline_inference/qwen2_5_omni/extract_prompts.py \
+  --input examples/offline_inference/qwen2_5_omni/meta.lst \
+  --output examples/offline_inference/qwen2_5_omni/top100.txt \
+  --topk 100
+```
+Get into the example folder
+```bash
+cd examples/offline_inference/qwen2_5_omni
+```
+Then run the command below.
+```bash
+bash run_multiple_prompts.sh
+```
+### Single Prompts
+Get into the example folder
+```bash
+cd examples/offline_inference/qwen2_5_omni
+```
+Then run the command below.
+```bash
+bash run_single_prompt.sh
+```
@@ -0,0 +1,264 @@
+import argparse
+import os
+import os as _os_env_toggle
+import random
+
+import numpy as np
+import soundfile as sf
+import torch
+from utils import make_omni_prompt
+from vllm.sampling_params import SamplingParams
+
+from vllm_omni.entrypoints.omni_llm import OmniLLM
+
+_os_env_toggle.environ["VLLM_USE_V1"] = "1"
+
+SEED = 42
+# Set all random seeds
+random.seed(SEED)
+np.random.seed(SEED)
+torch.manual_seed(SEED)
+torch.cuda.manual_seed(SEED)
+torch.cuda.manual_seed_all(SEED)
+
+# Make PyTorch deterministic
+torch.backends.cudnn.deterministic = True
+torch.backends.cudnn.benchmark = False
+
+# Set environment variables for deterministic behavior
+os.environ["PYTHONHASHSEED"] = str(SEED)
+os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model",
+        required=True,
+        help="Path to merged model directory (will be created if downloading).",
+    )
+    parser.add_argument("--thinker-model", type=str, default=None)
+    parser.add_argument("--talker-model", type=str, default=None)
+    parser.add_argument("--code2wav-model", type=str, default=None)
+    parser.add_argument(
+        "--hf-hub-id",
+        default="Qwen/Qwen2.5-Omni-7B",
+        help="Hugging Face repo id to download if needed.",
+    )
+    parser.add_argument("--hf-revision", default=None, help="Optional HF revision (branch/tag/commit).")
+    parser.add_argument("--prompts", nargs="+", default=None, help="Input text prompts.")
+    parser.add_argument("--voice-type", default="default", help="Voice type, e.g., m02, f030, default.")
+    parser.add_argument(
+        "--code2wav-dir",
+        default=None,
+        help="Path to code2wav folder (contains spk_dict.pt).",
+    )
+    parser.add_argument("--dit-ckpt", default=None, help="Path to DiT checkpoint file (e.g., dit.pt).")
+    parser.add_argument("--bigvgan-ckpt", default=None, help="Path to BigVGAN checkpoint file.")
+    parser.add_argument("--dtype", default="bfloat16", choices=["float16", "bfloat16", "float32"])
+    parser.add_argument("--max-model-len", type=int, default=32768)
+    parser.add_argument(
+        "--init-sleep-seconds",
+        type=int,
+        default=20,
+        help="Sleep seconds after starting each stage process to allow initialization (default: 20)",
+    )
+
+    parser.add_argument("--thinker-only", action="store_true")
+    parser.add_argument("--text-only", action="store_true")
+    parser.add_argument("--do-wave", action="store_true")
+    parser.add_argument(
+        "--prompt_type",
+        choices=[
+            "text",
+            "audio",
+            "audio-long",
+            "audio-long-chunks",
+            "audio-long-expand-chunks",
+            "image",
+            "video",
+            "video-frames",
+            "audio-in-video",
+            "audio-in-video-v2",
+            "audio-multi-round",
+            "badcase-vl",
+            "badcase-text",
+            "badcase-image-early-stop",
+            "badcase-two-audios",
+            "badcase-two-videos",
+            "badcase-multi-round",
+            "badcase-voice-type",
+            "badcase-voice-type-v2",
+            "badcase-audio-tower-1",
+            "badcase-audio-only",
+        ],
+        default="text",
+    )
+    parser.add_argument("--use-torchvision", action="store_true")
+    parser.add_argument("--tokenize", action="store_true")
+    parser.add_argument(
+        "--output-wav",
+        default="output.wav",
+        help="[Deprecated] Output wav directory (use --output-dir).",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="outputs",
+        help="Output directory to save text and wav files together.",
+    )
+    parser.add_argument(
+        "--thinker-hidden-states-dir",
+        default="thinker_hidden_states",
+        help="Path to thinker hidden states directory.",
+    )
+    parser.add_argument(
+        "--batch-timeout",
+        type=int,
+        default=5,
+        help="Timeout for batching in seconds (default: 5)",
+    )
+    parser.add_argument(
+        "--init-timeout",
+        type=int,
+        default=300,
+        help="Timeout for initializing stages in seconds (default: 300)",
+    )
+    parser.add_argument(
+        "--shm-threshold-bytes",
+        type=int,
+        default=65536,
+        help="Threshold for using shared memory in bytes (default: 65536)",
+    )
+    parser.add_argument(
+        "--enable-stats",
+        action="store_true",
+        default=False,
+        help="Enable writing detailed statistics (default: disabled)",
+    )
+    parser.add_argument(
+        "--txt-prompts",
+        type=str,
+        default=None,
+        help="Path to a .txt file with one prompt per line (preferred).",
+    )
+    parser.add_argument(
+        "--worker-backend",
+        type=str,
+        default="process",
+        choices=["process","ray"],
+        help="backend"
+    )
+    parser.add_argument(
+        "--ray-address",
+        type=str,
+        default=None,
+        help="Path to a .txt file with one prompt per line (preferred)."
+        )
+
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    model_name = args.model
+    try:
+        # Preferred: load from txt file (one prompt per line)
+        if getattr(args, "txt_prompts", None) and args.prompt_type == "text":
+            with open(args.txt_prompts, encoding="utf-8") as f:
+                lines = [ln.strip() for ln in f.readlines()]
+            args.prompts = [ln for ln in lines if ln != ""]
+            print(f"[Info] Loaded {len(args.prompts)} prompts from {args.txt_prompts}")
+    except Exception as e:
+        print(f"[Error] Failed to load prompts: {e}")
+        raise
+
+    if args.prompts is None:
+        raise ValueError("No prompts provided. Use --prompts ... or --txt-prompts <file.txt> (with --prompt_type text)")
+    omni_llm = OmniLLM(
+        model=model_name,
+        log_stats=args.enable_stats,
+        log_file=("omni_llm_pipeline.log" if args.enable_stats else None),
+        init_sleep_seconds=args.init_sleep_seconds,
+        batch_timeout=args.batch_timeout,
+        init_timeout=args.init_timeout,
+        shm_threshold_bytes=args.shm_threshold_bytes,
+        worker_backend=args.worker_backend,
+        ray_address=args.ray_address
+    )
+    thinker_sampling_params = SamplingParams(
+        temperature=0.0,  # Deterministic - no randomness
+        top_p=1.0,  # Disable nucleus sampling
+        top_k=-1,  # Disable top-k sampling
+        max_tokens=2048,
+        seed=SEED,  # Fixed seed for sampling
+        detokenize=True,
+        repetition_penalty=1.1,
+    )
+    talker_sampling_params = SamplingParams(
+        temperature=0.9,
+        top_p=0.8,
+        top_k=40,
+        max_tokens=2048,
+        seed=SEED,  # Fixed seed for sampling
+        detokenize=True,
+        repetition_penalty=1.05,
+        stop_token_ids=[8294],
+    )
+    code2wav_sampling_params = SamplingParams(
+        temperature=0.0,  # Deterministic - no randomness
+        top_p=1.0,  # Disable nucleus sampling
+        top_k=-1,  # Disable top-k sampling
+        max_tokens=2048,
+        seed=SEED,  # Fixed seed for sampling
+        detokenize=True,
+        repetition_penalty=1.1,
+    )
+
+    sampling_params_list = [
+        thinker_sampling_params,
+        talker_sampling_params,
+        code2wav_sampling_params,
+    ]
+    import time
+    for i in range(1):
+        t1 = time.time()
+        prompt = [make_omni_prompt(args, prompt) for prompt in args.prompts]
+        print(f"prompt:{prompt}")
+        omni_outputs = omni_llm.generate(prompt, sampling_params_list)
+        t2 = time.time()
+        print(f"==========> time:{t2-t1}")
+
+    # Determine output directory: prefer --output-dir; fallback to --output-wav
+    output_dir = args.output_dir if getattr(args, "output_dir", None) else args.output_wav
+    os.makedirs(output_dir, exist_ok=True)
+    for stage_outputs in omni_outputs:
+        if stage_outputs.final_output_type == "text":
+            for output in stage_outputs.request_output:
+                request_id = int(output.request_id)
+                text_output = output.outputs[0].text
+                # Save aligned text file per request
+                prompt_text = args.prompts[request_id]
+                out_txt = os.path.join(output_dir, f"{request_id:05d}.txt")
+                lines = []
+                lines.append("Prompt:\n")
+                lines.append(str(prompt_text) + "\n")
+                lines.append("vllm_text_output:\n")
+                lines.append(str(text_output).strip() + "\n")
+                try:
+                    with open(out_txt, "w", encoding="utf-8") as f:
+                        f.writelines(lines)
+                except Exception as e:
+                    print(f"[Warn] Failed writing text file {out_txt}: {e}")
+                print(f"Request ID: {request_id}, Text saved to {out_txt}")
+        elif stage_outputs.final_output_type == "audio":
+            for output in stage_outputs.request_output:
+                request_id = int(output.request_id)
+                audio_tensor = output.multimodal_output["audio"]
+                output_wav = os.path.join(output_dir, f"output_{output.request_id}.wav")
+                sf.write(output_wav, audio_tensor.detach().cpu().numpy(), samplerate=24000)
+                print(f"Request ID: {request_id}, Saved audio to {output_wav}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+import argparse
+from typing import Optional
+
+
+def extract_prompt(line: str) -> Optional[str]:
+    # 提取第一个 '|' 与第二个 '|' 之间的内容
+    i = line.find("|")
+    if i == -1:
+        return None
+    j = line.find("|", i + 1)
+    if j == -1:
+        return None
+    return line[i + 1 : j].strip()
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input", "-i", required=True, help="Input .lst file path")
+    parser.add_argument("--output", "-o", required=True, help="Output file path")
+    parser.add_argument(
+        "--topk",
+        "-k",
+        type=int,
+        default=100,
+        help="Extract the top K prompts (default: 100)",
+    )
+    args = parser.parse_args()
+
+    prompts = []
+    with open(args.input, encoding="utf-8", errors="ignore") as f:
+        for line in f:
+            if len(prompts) >= args.topk:
+                break
+            p = extract_prompt(line.rstrip("\n"))
+            if p:
+                prompts.append(p)
+
+    with open(args.output, "w", encoding="utf-8") as f:
+        for p in prompts:
+            f.write(p + "\n")
+
+
+if __name__ == "__main__":
+    main()