We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent cb1b16c commit f23e28bCopy full SHA for f23e28b
vllm/v1/attention/backends/mla/rocm_aiter_mla.py
@@ -80,9 +80,7 @@ class AiterMLAMetadata(MLACommonMetadata[AiterMLADecodeMetadata]):
80
class AiterMLAMetadataBuilder(MLACommonMetadataBuilder[AiterMLAMetadata]):
81
# TODO(luka, lucas): audit this as part of:
82
# https://github.com/vllm-project/vllm/issues/22945
83
- cudagraph_support: ClassVar[AttentionCGSupport] = (
84
- AttentionCGSupport.UNIFORM_SINGLE_TOKEN_DECODE
85
- )
+ cudagraph_support: ClassVar[AttentionCGSupport] = AttentionCGSupport.UNIFORM_BATCH
86
query_len_support: ClassVar[QueryLenSupport] = QueryLenSupport.VARLEN
87
88
def __init__(
0 commit comments