@@ -49,12 +49,6 @@ def get_peer_mem_addr(*args, **kwargs):
4949 raise RuntimeError ("CUDA no need of get_peer_mem_addr!" )
5050
5151 elif current_platform .is_maca ():
52- from fastdeploy .model_executor .ops .gpu import (
53- swap_cache_per_layer , # 单层 KV cache 换入算子(同步)
54- )
55- from fastdeploy .model_executor .ops .gpu import (
56- swap_cache_per_layer_async , # 单层 KV cache 换入算子(异步,无强制 sync)
57- )
5852 from fastdeploy .model_executor .ops .gpu import ( # get_output_kv_signal,; ipc_sent_key_value_cache_by_remote_ptr_block_sync,
5953 cuda_host_alloc ,
6054 cuda_host_free ,
@@ -72,10 +66,16 @@ def get_peer_mem_addr(*args, **kwargs):
7266 raise RuntimeError ("CUDA no need of get_peer_mem_addr!" )
7367
7468 def get_output_kv_signal (* args , ** kwargs ):
75- raise RuntimeError ("Metax get_output_kv_signal UNIMPLENENTED !" )
69+ raise RuntimeError ("Metax get_output_kv_signal UNIMPLEMENTED !" )
7670
7771 def ipc_sent_key_value_cache_by_remote_ptr_block_sync (* args , ** kwargs ):
78- raise RuntimeError ("Metax ipc_sent_key_value_cache_by_remote_ptr_block_sync UNIMPLENENTED!" )
72+ raise RuntimeError ("Metax ipc_sent_key_value_cache_by_remote_ptr_block_sync UNIMPLEMENTED!" )
73+
74+ def swap_cache_per_layer (* args , ** kwargs ): # 单层 KV cache 换入算子(同步)
75+ raise RuntimeError ("Metax swap_cache_per_layer UNIMPLEMENTED" )
76+
77+ def swap_cache_per_layer_async (* args , ** kwargs ): # 单层 KV cache 换入算子(异步)
78+ raise RuntimeError ("Metax swap_cache_per_layer_async UNIMPLEMENTED" )
7979
8080 elif current_platform .is_xpu ():
8181 from fastdeploy .model_executor .ops .xpu import (
@@ -93,19 +93,19 @@ def ipc_sent_key_value_cache_by_remote_ptr_block_sync(*args, **kwargs):
9393 memory_allocated = paddle .device .xpu .memory_allocated
9494
9595 def get_data_ptr_ipc (* args , ** kwargs ):
96- raise RuntimeError ("XPU get_data_ptr_ipc UNIMPLENENTED !" )
96+ raise RuntimeError ("XPU get_data_ptr_ipc UNIMPLEMENTED !" )
9797
9898 def ipc_sent_key_value_cache_by_remote_ptr (* args , ** kwargs ):
99- raise RuntimeError ("XPU ipc_sent_key_value_cache_by_remote_ptr UNIMPLENENTED " )
99+ raise RuntimeError ("XPU ipc_sent_key_value_cache_by_remote_ptr UNIMPLEMENTED " )
100100
101101 def ipc_sent_key_value_cache_by_remote_ptr_block_sync (* args , ** kwargs ):
102- raise RuntimeError ("XPU No ipc_sent_key_value_cache_by_remote_ptr UNIMPLENENTED " )
102+ raise RuntimeError ("XPU No ipc_sent_key_value_cache_by_remote_ptr UNIMPLEMENTED " )
103103
104104 def swap_cache_per_layer (* args , ** kwargs ): # 单层 KV cache 换入算子(同步)
105- raise RuntimeError ("XPU swap_cache_per_layer UNIMPLENENTED " )
105+ raise RuntimeError ("XPU swap_cache_per_layer UNIMPLEMENTED " )
106106
107107 def swap_cache_per_layer_async (* args , ** kwargs ): # 单层 KV cache 换入算子(异步)
108- raise RuntimeError ("XPU swap_cache_per_layer_async UNIMPLENENTED " )
108+ raise RuntimeError ("XPU swap_cache_per_layer_async UNIMPLEMENTED " )
109109
110110 else :
111111 raise RuntimeError ("Prefix cache ops only supported CUDA nor XPU platform " )
0 commit comments