Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion vllm_omni/entrypoints/omni_stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,9 +452,9 @@ def filter(self, record: _logging.LogRecord) -> bool:
max_batch_size = int(runtime_cfg.get("max_batch_size", 1) or 1)
print(f"[Stage-{stage_id}] Max batch size: {max_batch_size}")
batch_tasks: list[dict[str, Any]] = [task]
start_time = _time.time()
if max_batch_size > 1:
while len(batch_tasks) < max_batch_size:
start_time = _time.time()
if not in_q.empty():
extra = in_q.get_nowait()
if extra is None:
Expand Down
59 changes: 59 additions & 0 deletions vllm_omni/worker/gpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from vllm.distributed.parallel_state import get_pp_group
from vllm.forward_context import BatchDescriptor, set_forward_context
from vllm.logger import init_logger
from vllm.model_executor.layers.rotary_embedding import MRotaryEmbedding
from vllm.model_executor.models.interfaces import supports_mrope
from vllm.model_executor.models.interfaces_base import VllmModelForPooling
from vllm.sampling_params import SamplingType
from vllm.utils import LazyLoader, cdiv
Expand Down Expand Up @@ -33,6 +35,63 @@


class OmniGPUModelRunner(GPUModelRunner):
def _init_mrope_positions(self, req_state: CachedRequestState):
"""Initialize M-RoPE positions for multimodal inputs.

Extracts multimodal feature metadata (image grids, video grids,
audio features) and computes M-RoPE positions for proper positional
encoding of multimodal tokens.

Args:
req_state: Cached request state containing multimodal features

Raises:
AssertionError: If the model does not support M-RoPE
"""
image_grid_thw = []
video_grid_thw = []
second_per_grid_ts = []
audio_feature_lengths = []
use_audio_in_video = False
for mm_feature in req_state.mm_features:
mm_item = mm_feature.data
if mm_item is None:
continue
mm_input = mm_item.get_data()
if (t := mm_input.get("image_grid_thw")) is not None:
image_grid_thw.append(t.tolist())
if (t := mm_input.get("video_grid_thw")) is not None:
video_grid_thw.append(t.tolist())
if (t := mm_input.get("second_per_grid_ts")) is not None:
second_per_grid_ts.append(t)
if (t := mm_input.get("audio_feature_lengths")) is not None:
audio_feature_lengths.append(t)
# Check for use_audio_in_video
use_audio_in_video_value = mm_input.get("use_audio_in_video")
if use_audio_in_video_value is not None:
use_audio_in_video = bool(use_audio_in_video_value.item())

if supports_mrope(self.model):
req_state.mrope_positions, req_state.mrope_position_delta = self.model.get_mrope_input_positions(
req_state.prompt_token_ids,
hf_config=self.model_config.hf_config,
image_grid_thw=image_grid_thw,
video_grid_thw=video_grid_thw,
second_per_grid_ts=second_per_grid_ts,
audio_feature_lengths=audio_feature_lengths,
use_audio_in_video=use_audio_in_video,
)
else:
req_state.mrope_positions, req_state.mrope_position_delta = MRotaryEmbedding.get_input_positions_tensor(
req_state.prompt_token_ids,
hf_config=self.model_config.hf_config,
image_grid_thw=image_grid_thw,
video_grid_thw=video_grid_thw,
second_per_grid_ts=second_per_grid_ts,
audio_feature_lengths=audio_feature_lengths,
use_audio_in_video=use_audio_in_video,
)

def _update_states(self, scheduler_output: "SchedulerOutput") -> None:
"""Update the cached states and the persistent batch with the scheduler
output.
Expand Down