Skip to content

Commit 596f40e

Browse files
Guanyu Chen (i26275)Tryorish
authored andcommitted
[BugFix][Metax][KVCache] fix: resolve None callable error when import fails
1 parent 4b86e2b commit 596f40e

1 file changed

Lines changed: 13 additions & 13 deletions

File tree

  • fastdeploy/cache_manager

fastdeploy/cache_manager/ops.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,6 @@ def get_peer_mem_addr(*args, **kwargs):
4949
raise RuntimeError("CUDA no need of get_peer_mem_addr!")
5050

5151
elif current_platform.is_maca():
52-
from fastdeploy.model_executor.ops.gpu import (
53-
swap_cache_per_layer, # 单层 KV cache 换入算子(同步)
54-
)
55-
from fastdeploy.model_executor.ops.gpu import (
56-
swap_cache_per_layer_async, # 单层 KV cache 换入算子(异步,无强制 sync)
57-
)
5852
from fastdeploy.model_executor.ops.gpu import ( # get_output_kv_signal,; ipc_sent_key_value_cache_by_remote_ptr_block_sync,
5953
cuda_host_alloc,
6054
cuda_host_free,
@@ -72,10 +66,16 @@ def get_peer_mem_addr(*args, **kwargs):
7266
raise RuntimeError("CUDA no need of get_peer_mem_addr!")
7367

7468
def get_output_kv_signal(*args, **kwargs):
75-
raise RuntimeError("Metax get_output_kv_signal UNIMPLENENTED!")
69+
raise RuntimeError("Metax get_output_kv_signal UNIMPLEMENTED!")
7670

7771
def ipc_sent_key_value_cache_by_remote_ptr_block_sync(*args, **kwargs):
78-
raise RuntimeError("Metax ipc_sent_key_value_cache_by_remote_ptr_block_sync UNIMPLENENTED!")
72+
raise RuntimeError("Metax ipc_sent_key_value_cache_by_remote_ptr_block_sync UNIMPLEMENTED!")
73+
74+
def swap_cache_per_layer(*args, **kwargs): # 单层 KV cache 换入算子(同步)
75+
raise RuntimeError("Metax swap_cache_per_layer UNIMPLEMENTED")
76+
77+
def swap_cache_per_layer_async(*args, **kwargs): # 单层 KV cache 换入算子(异步)
78+
raise RuntimeError("Metax swap_cache_per_layer_async UNIMPLEMENTED")
7979

8080
elif current_platform.is_xpu():
8181
from fastdeploy.model_executor.ops.xpu import (
@@ -93,19 +93,19 @@ def ipc_sent_key_value_cache_by_remote_ptr_block_sync(*args, **kwargs):
9393
memory_allocated = paddle.device.xpu.memory_allocated
9494

9595
def get_data_ptr_ipc(*args, **kwargs):
96-
raise RuntimeError("XPU get_data_ptr_ipc UNIMPLENENTED!")
96+
raise RuntimeError("XPU get_data_ptr_ipc UNIMPLEMENTED!")
9797

9898
def ipc_sent_key_value_cache_by_remote_ptr(*args, **kwargs):
99-
raise RuntimeError("XPU ipc_sent_key_value_cache_by_remote_ptr UNIMPLENENTED")
99+
raise RuntimeError("XPU ipc_sent_key_value_cache_by_remote_ptr UNIMPLEMENTED")
100100

101101
def ipc_sent_key_value_cache_by_remote_ptr_block_sync(*args, **kwargs):
102-
raise RuntimeError("XPU No ipc_sent_key_value_cache_by_remote_ptr UNIMPLENENTED")
102+
raise RuntimeError("XPU No ipc_sent_key_value_cache_by_remote_ptr UNIMPLEMENTED")
103103

104104
def swap_cache_per_layer(*args, **kwargs): # 单层 KV cache 换入算子(同步)
105-
raise RuntimeError("XPU swap_cache_per_layer UNIMPLENENTED")
105+
raise RuntimeError("XPU swap_cache_per_layer UNIMPLEMENTED")
106106

107107
def swap_cache_per_layer_async(*args, **kwargs): # 单层 KV cache 换入算子(异步)
108-
raise RuntimeError("XPU swap_cache_per_layer_async UNIMPLENENTED")
108+
raise RuntimeError("XPU swap_cache_per_layer_async UNIMPLEMENTED")
109109

110110
else:
111111
raise RuntimeError("Prefix cache ops only supported CUDA nor XPU platform ")

0 commit comments

Comments
 (0)