We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 26422e4 commit 395aa82Copy full SHA for 395aa82
vllm/attention/selector.py
@@ -1,4 +1,5 @@
1
from functools import lru_cache
2
+from typing import Type
3
4
import torch
5
@@ -10,7 +11,7 @@
10
11
12
13
@lru_cache(maxsize=None)
-def get_attn_backend(dtype: torch.dtype) -> AttentionBackend:
14
+def get_attn_backend(dtype: torch.dtype) -> Type[AttentionBackend]:
15
if _can_use_flash_attn(dtype):
16
logger.info("Using FlashAttention backend.")
17
from vllm.attention.backends.flash_attn import ( # noqa: F401
0 commit comments