Skip to content

Commit a92ec36

Browse files
committed
fix_attention_free_models
Signed-off-by: Huamin Li <[email protected]>
1 parent b2e65cb commit a92ec36

File tree

1 file changed

+13
-2
lines changed

1 file changed

+13
-2
lines changed

vllm/v1/worker/gpu_model_runner.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4008,8 +4008,14 @@ def create_attn_groups(
40084008
for attn_backends_map in attention_backend_maps:
40094009
self.attn_groups.append(create_attn_groups(attn_backends_map))
40104010

4011-
# Calculate reorder batch threshold (if needed)
4012-
self.calculate_reorder_batch_threshold()
4011+
# Calculate reorder batch threshold (if needed).
4012+
# For attention-free models there will be no attention groups; in that case
4013+
# there is nothing to reorder.
4014+
if any(len(groups) for groups in self.attn_groups):
4015+
self.calculate_reorder_batch_threshold()
4016+
else:
4017+
# Disable reordering explicitly to make intent clear to later call sites.
4018+
self.reorder_batch_threshold = None
40134019

40144020
def _check_and_update_cudagraph_mode(
40154021
self, attention_backends: set[type[AttentionBackend]]
@@ -4149,6 +4155,11 @@ def calculate_reorder_batch_threshold(self) -> None:
41494155
group.get_metadata_builder().reorder_batch_threshold
41504156
for group in self._attn_group_iterator()
41514157
]
4158+
# If there are no attention groups (attention-free model) or no backend
4159+
# reports a threshold, leave reordering disabled.
4160+
if not reorder_batch_thresholds:
4161+
self.reorder_batch_threshold = None
4162+
return
41524163
self.reorder_batch_threshold = reduce(min_none_high, reorder_batch_thresholds)
41534164

41544165
def _find_compatible_block_sizes(

0 commit comments

Comments
 (0)