Skip to content

Commit 5c2e66e

Browse files
authored
[Bugfix] More type hint fixes for py 3.8 (#4039)
1 parent 546e721 commit 5c2e66e

File tree

4 files changed

+8
-8
lines changed

4 files changed

+8
-8
lines changed

vllm/executor/executor_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def determine_num_available_blocks(self) -> Tuple[int, int]:
3939
ExecutorBase may require modification of the result, e.g. to ensure the
4040
selected cache sizes are compatible with all workers.
4141
42-
Returns a tuple[num_gpu_blocks, num_cpu_blocks], where num_gpu_blocks
42+
Returns a Tuple[num_gpu_blocks, num_cpu_blocks], where num_gpu_blocks
4343
are blocks that are "active" on the device and can be appended to.
4444
num_cpu_blocks refers to "swapped" blocks in CPU memory and cannot be
4545
appended to.

vllm/worker/cpu_worker.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""A CPU worker class."""
2-
from typing import Dict, List, Optional
2+
from typing import Dict, List, Optional, Tuple
33

44
import torch
55
import torch.distributed
@@ -157,7 +157,7 @@ def init_device(self) -> None:
157157
def load_model(self):
158158
self.model_runner.load_model()
159159

160-
def determine_num_available_blocks(self) -> tuple[int, int]:
160+
def determine_num_available_blocks(self) -> Tuple[int, int]:
161161
"""Determine the number of blocks available for the KV cache.
162162
163163
This determines how many KV blocks can fit into the configured CPU

vllm/worker/neuron_worker.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""A Neuron worker class."""
2-
from typing import List, Optional
2+
from typing import List, Optional, Tuple
33

44
import torch
55
import torch.distributed
@@ -40,7 +40,7 @@ def init_device(self) -> None:
4040
def load_model(self):
4141
self.model_runner.load_model()
4242

43-
def determine_num_available_blocks(self) -> tuple[int, int]:
43+
def determine_num_available_blocks(self) -> Tuple[int, int]:
4444
"""Determine the number of available KV blocks.
4545
4646
Swapping is not yet supported, so always return num_cpu_blocks=0.

vllm/worker/worker_base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from abc import ABC, abstractmethod
2-
from typing import Dict, List
2+
from typing import Dict, List, Tuple
33

44
from vllm.lora.request import LoRARequest
55
from vllm.sequence import SamplerOutput, SequenceGroupMetadata
@@ -18,14 +18,14 @@ def init_device(self) -> None:
1818
raise NotImplementedError
1919

2020
@abstractmethod
21-
def determine_num_available_blocks(self) -> tuple[int, int]:
21+
def determine_num_available_blocks(self) -> Tuple[int, int]:
2222
"""Determine the number of available blocks for the GPU KV cache and
2323
swappable CPU KV cache.
2424
2525
The implementation may run profiling or other heuristics to determine
2626
the size of caches.
2727
28-
Returns a tuple[num_gpu_blocks, num_cpu_blocks], where num_gpu_blocks
28+
Returns a Tuple[num_gpu_blocks, num_cpu_blocks], where num_gpu_blocks
2929
are blocks that are "active" on the device and can be appended to.
3030
num_cpu_blocks refers to "swapped" blocks in CPU memory and cannot be
3131
appended to.

0 commit comments

Comments
 (0)