Skip to content

Commit f23e28b

Browse files
committed
cudagraph support to uniform_batch
Signed-off-by: ganyi <[email protected]>
1 parent cb1b16c commit f23e28b

File tree

1 file changed

+1
-3
lines changed

1 file changed

+1
-3
lines changed

vllm/v1/attention/backends/mla/rocm_aiter_mla.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,7 @@ class AiterMLAMetadata(MLACommonMetadata[AiterMLADecodeMetadata]):
8080
class AiterMLAMetadataBuilder(MLACommonMetadataBuilder[AiterMLAMetadata]):
8181
# TODO(luka, lucas): audit this as part of:
8282
# https://github.com/vllm-project/vllm/issues/22945
83-
cudagraph_support: ClassVar[AttentionCGSupport] = (
84-
AttentionCGSupport.UNIFORM_SINGLE_TOKEN_DECODE
85-
)
83+
cudagraph_support: ClassVar[AttentionCGSupport] = AttentionCGSupport.UNIFORM_BATCH
8684
query_len_support: ClassVar[QueryLenSupport] = QueryLenSupport.VARLEN
8785

8886
def __init__(

0 commit comments

Comments
 (0)