File tree Expand file tree Collapse file tree 2 files changed +9
-4
lines changed
Expand file tree Collapse file tree 2 files changed +9
-4
lines changed Original file line number Diff line number Diff line change 77from torch .nn .functional import scaled_dot_product_attention
88
99from vllm .attention .backends .abstract import (AttentionBackend , AttentionImpl ,
10- AttentionMetadata , AttentionMetadataPerStage )
10+ AttentionMetadata ,
11+ AttentionMetadataPerStage )
1112from vllm .attention .ops .paged_attn import (PagedAttention ,
1213 PagedAttentionMetadata )
1314
@@ -49,7 +50,8 @@ def copy_blocks(
4950
5051
5152@dataclass
52- class TorchSDPAMetadata (AttentionMetadata , PagedAttentionMetadata , AttentionMetadataPerStage ):
53+ class TorchSDPAMetadata (AttentionMetadata , PagedAttentionMetadata ,
54+ AttentionMetadataPerStage ):
5355 """Metadata for TorchSDPABackend.
5456 """
5557 # Currently, input sequences can only contain all prompts
@@ -244,4 +246,4 @@ def _make_sliding_window_bias(
244246 mask = torch .log (mask )
245247 attn_biases .append (mask .to (dtype ))
246248
247- return attn_biases
249+ return attn_biases
Original file line number Diff line number Diff line change @@ -116,13 +116,16 @@ def _verify_and_get_model_config(config: ModelConfig) -> ModelConfig:
116116 config .enforce_eager = True
117117 return config
118118
119- def _verify_and_get_scheduler_config (config : SchedulerConfig ) -> SchedulerConfig :
119+
120+ def _verify_and_get_scheduler_config (
121+ config : SchedulerConfig ) -> SchedulerConfig :
120122 if config .chunked_prefill_enabled :
121123 logger .warning ("Chunked prefill is not supported on CPU, disable it." )
122124 config .chunked_prefill_enabled = False
123125
124126 return config
125127
128+
126129def _verify_and_get_cache_config (config : CacheConfig ) -> CacheConfig :
127130 _GB = 1 << 30
128131 if config .enable_prefix_caching :
You can’t perform that action at this time.
0 commit comments