@@ -1272,66 +1272,6 @@ def hash_block_features(self, input_ids, extra_keys: list = []):
12721272 """
12731273 return hashlib .sha256 (pickle .dumps ((input_ids , extra_keys ))).hexdigest ()
12741274
1275- def _revert_match_blocks (
1276- self ,
1277- request ,
1278- matched_token_num : int ,
1279- block_size : int ,
1280- chunk_idx : int ,
1281- match_node_ids : list ,
1282- matche_nodes : list ,
1283- match_gpu_block_ids : list ,
1284- match_cpu_block_ids : list ,
1285- gpu_match_token_num : int ,
1286- cpu_match_token_num : int ,
1287- swap_node_ids : list ,
1288- ):
1289- # position = request.multimodal_inputs["mm_positions"][chunk_idx]
1290- # revert_tokens = matched_token_num - position.offset
1291- # TODO(chengyanfu): fix when is_chunked_mm_input=True, revert all matched tokens
1292- revert_tokens = matched_token_num
1293- match_block_ids = [node .block_id for node in matche_nodes ]
1294- logger .warning (
1295- f"match_block: req_id { request .request_id } revert tokens: { revert_tokens } from matched nodes: { match_block_ids } "
1296- )
1297- while revert_tokens >= block_size :
1298- if len (matche_nodes ) == 0 :
1299- logger .error (f"req_id { request .request_id } revert nodes error, tokens: { revert_tokens } " )
1300- break
1301- revert_tokens -= block_size
1302- revert_block = matche_nodes .pop ()
1303- revert_block_id = revert_block .block_id
1304- if revert_block_id in match_gpu_block_ids :
1305- match_gpu_block_ids .remove (revert_block_id )
1306- match_node_ids .remove (revert_block .node_id )
1307- gpu_match_token_num -= block_size
1308- elif revert_block_id in match_cpu_block_ids :
1309- match_cpu_block_ids .remove (revert_block_id )
1310- match_node_ids .remove (revert_block .node_id )
1311- cpu_match_token_num -= block_size
1312- else :
1313- logger .error (
1314- f"req_id { request .request_id } revert nodes error, nodes: { revert_block_id } , "
1315- f"match_gpu_block_ids: { match_gpu_block_ids } , match_cpu_block_ids: { match_cpu_block_ids } "
1316- )
1317- break
1318- if revert_block_id in swap_node_ids :
1319- swap_node_ids .remove (revert_block_id )
1320-
1321- if revert_tokens > 0 :
1322- last_block_id = matche_nodes [- 1 ].block_id
1323- if last_block_id in match_gpu_block_ids :
1324- gpu_match_token_num -= revert_tokens
1325- elif last_block_id in match_cpu_block_ids :
1326- cpu_match_token_num -= revert_tokens
1327- else :
1328- logger .error (
1329- f"req_id { request .request_id } revert nodes error, revert_tokens: { revert_tokens } , nodes: { last_block_id } , "
1330- f"match_gpu_block_ids: { match_gpu_block_ids } , match_cpu_block_ids: { match_cpu_block_ids } "
1331- )
1332- current_node = self .radix_tree_root if len (matche_nodes ) == 0 else matche_nodes [- 1 ]
1333- return gpu_match_token_num , cpu_match_token_num , current_node
1334-
13351275 def mm_match_block (self , request , block_size ):
13361276 """
13371277 Match and retrieve cached blocks for multimodal requests using a radix tree structure.
@@ -1420,28 +1360,6 @@ def mm_match_block(self, request, block_size):
14201360 if has_modified_cpu_lru_leaf_heap :
14211361 heapq .heapify (self .cpu_lru_leaf_heap )
14221362
1423- if self .cache_config .disable_chunked_mm_input :
1424- matched_token_num = gpu_match_token_num + cpu_match_token_num
1425- is_chunked , chunk_idx = self .is_chunked_mm_input (request .multimodal_inputs , matched_token_num )
1426- if is_chunked :
1427- (
1428- gpu_match_token_num ,
1429- cpu_match_token_num ,
1430- current_match_node ,
1431- ) = self ._revert_match_blocks (
1432- request = request ,
1433- matched_token_num = matched_token_num ,
1434- block_size = block_size ,
1435- chunk_idx = chunk_idx ,
1436- match_node_ids = match_node_ids ,
1437- matche_nodes = matche_nodes ,
1438- match_gpu_block_ids = match_gpu_block_ids ,
1439- match_cpu_block_ids = match_cpu_block_ids ,
1440- gpu_match_token_num = gpu_match_token_num ,
1441- cpu_match_token_num = cpu_match_token_num ,
1442- swap_node_ids = swap_node_ids ,
1443- )
1444-
14451363 logger .info (f"match_block: req_id { request .request_id } matched nodes: { match_node_ids } " )
14461364 return (
14471365 match_gpu_block_ids ,
0 commit comments