From a8bc6855d134b46bc11af6c101e13756db36a8ed Mon Sep 17 00:00:00 2001 From: Matthew Bonanni Date: Thu, 30 Oct 2025 00:46:33 +0000 Subject: [PATCH] change threshold Signed-off-by: Matthew Bonanni --- vllm/v1/attention/backends/mla/flashmla.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/attention/backends/mla/flashmla.py b/vllm/v1/attention/backends/mla/flashmla.py index 1f98204031ed..bc1730753209 100644 --- a/vllm/v1/attention/backends/mla/flashmla.py +++ b/vllm/v1/attention/backends/mla/flashmla.py @@ -71,7 +71,7 @@ class FlashMLAMetadata(MLACommonMetadata[FlashMLADecodeMetadata]): class FlashMLAMetadataBuilder(MLACommonMetadataBuilder[FlashMLAMetadata]): cudagraph_support: ClassVar[AttentionCGSupport] = AttentionCGSupport.UNIFORM_BATCH query_len_support: ClassVar[QueryLenSupport] = QueryLenSupport.UNIFORM - reorder_batch_threshold: int = 512 # process small prefills with decode pathway + reorder_batch_threshold: int = 128 # process small prefills with decode pathway # ^ TODO(matt): tune this def __init__(