Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion vllm/v1/engine/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ def __init__(
vllm_config.ec_transfer_config is not None
and vllm_config.ec_transfer_config.is_ec_producer
)
self.is_pooling_model = vllm_config.model_config.runner_type == "pooling"

self.request_block_hasher: Callable[[Request], list[BlockHash]] | None = None
if vllm_config.cache_config.enable_prefix_caching or kv_connector is not None:
Expand Down Expand Up @@ -392,7 +393,7 @@ def step_with_batch_queue(
if not self.ec_producer:
model_executed = scheduler_output.total_num_scheduled_tokens > 0

if not model_executed:
if self.is_pooling_model or not model_executed:
# No sampling required (no requests scheduled).
future = cast(Future[ModelRunnerOutput], exec_future)
else:
Expand Down
8 changes: 4 additions & 4 deletions vllm/v1/executor/ray_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,9 @@ def _init_executor(self) -> None:
# KV connector setup
self.has_connector = self.vllm_config.kv_transfer_config is not None

self.ec_producer = (
self.vllm_config.ec_transfer_config is not None
and self.vllm_config.ec_transfer_config.is_ec_producer
self.uses_sampler = self.vllm_config.model_config.runner_type != "pooling" and (
self.vllm_config.ec_transfer_config is None
or not self.vllm_config.ec_transfer_config.is_ec_producer
)

self.scheduler_output: SchedulerOutput | None = None
Expand Down Expand Up @@ -401,7 +401,7 @@ def execute_model( # type: ignore[override]
"after execute_model() returns None."
)

if self.ec_producer or not scheduler_output.total_num_scheduled_tokens:
if not self.uses_sampler or not scheduler_output.total_num_scheduled_tokens:
# Model will not execute, call model runner immediately.
return self._execute_dag(scheduler_output, None, non_block)

Expand Down