@@ -4008,8 +4008,14 @@ def create_attn_groups(
40084008 for attn_backends_map in attention_backend_maps :
40094009 self .attn_groups .append (create_attn_groups (attn_backends_map ))
40104010
4011- # Calculate reorder batch threshold (if needed)
4012- self .calculate_reorder_batch_threshold ()
4011+ # Calculate reorder batch threshold (if needed).
4012+ # For attention-free models there will be no attention groups; in that case
4013+ # there is nothing to reorder.
4014+ if any (len (groups ) for groups in self .attn_groups ):
4015+ self .calculate_reorder_batch_threshold ()
4016+ else :
4017+ # Disable reordering explicitly to make intent clear to later call sites.
4018+ self .reorder_batch_threshold = None
40134019
40144020 def _check_and_update_cudagraph_mode (
40154021 self , attention_backends : set [type [AttentionBackend ]]
@@ -4149,6 +4155,11 @@ def calculate_reorder_batch_threshold(self) -> None:
41494155 group .get_metadata_builder ().reorder_batch_threshold
41504156 for group in self ._attn_group_iterator ()
41514157 ]
4158+ # If there are no attention groups (attention-free model) or no backend
4159+ # reports a threshold, leave reordering disabled.
4160+ if not reorder_batch_thresholds :
4161+ self .reorder_batch_threshold = None
4162+ return
41524163 self .reorder_batch_threshold = reduce (min_none_high , reorder_batch_thresholds )
41534164
41544165 def _find_compatible_block_sizes (
0 commit comments