From bd662532eadcba1897d8c7a4e6d70b32eb278cc3 Mon Sep 17 00:00:00 2001
From: Benjamin Chislett <bchislett@nvidia.com>
Date: Fri, 14 Nov 2025 17:12:38 +0000
Subject: [PATCH] fix chunked local attn

Signed-off-by: Benjamin Chislett <bchislett@nvidia.com>
---
 .../layers/chunked_local_attention.py         | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/vllm/attention/layers/chunked_local_attention.py b/vllm/attention/layers/chunked_local_attention.py
index f144e8435b6c..48fcc6fa736b 100644
--- a/vllm/attention/layers/chunked_local_attention.py
+++ b/vllm/attention/layers/chunked_local_attention.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import functools
-from typing import ClassVar
 
 import torch
 
@@ -12,11 +11,16 @@
 from vllm.model_executor.layers.quantization import QuantizationConfig
 from vllm.v1.attention.backends.utils import (
     AttentionCGSupport,
+    AttentionMetadataBuilder,
     CommonAttentionMetadata,
     make_local_attention_virtual_batches,
     subclass_attention_backend,
 )
-from vllm.v1.kv_cache_interface import ChunkedLocalAttentionSpec, KVCacheSpec
+from vllm.v1.kv_cache_interface import (
+    AttentionSpec,
+    ChunkedLocalAttentionSpec,
+    KVCacheSpec,
+)
 
 from ..layer import Attention
 
@@ -30,9 +34,18 @@ def create_chunked_local_attention_backend(
     prefix = f"ChunkedLocalAttention_{attention_chunk_size}_{block_size}_"
 
     underlying_builder = underlying_attn_backend.get_builder_cls()
+    assert issubclass(underlying_builder, AttentionMetadataBuilder)
 
     class ChunkedLocalAttentionBuilder(underlying_builder):  # type: ignore
-        _cudagraph_support: ClassVar[AttentionCGSupport] = AttentionCGSupport.NEVER
+        @classmethod
+        def get_cudagraph_support(
+            cls: type["AttentionMetadataBuilder"],
+            vllm_config: VllmConfig,
+            kv_cache_spec: AttentionSpec,
+        ) -> AttentionCGSupport:
+            # Explicit override in case the underlying builder specialized this getter.
+            # @override omitted only because of mypy limitation due to type variable.
+            return AttentionCGSupport.NEVER
 
         def build(
             self,