3434from fastdeploy .input .preprocess import InputPreprocessor
3535from fastdeploy .inter_communicator import (
3636 IPCSignal ,
37- KVCacheStatus ,
3837 ModelWeightsStatus ,
3938 PrefixTreeStatus ,
4039 RearrangeExpertStatus ,
@@ -529,6 +528,19 @@ def update_model_weight(self, timeout=300):
529528 2 : worker update finish and notify client
530529 """
531530 with self .clear_update_lock :
531+ if self .fd_config .cache_config .swap_space :
532+ return False , "hierarchical cache updating is not supported"
533+
534+ if self .enable_prefix_caching :
535+ # prefix_tree_status_signal: CLEARED -> UPDATING -> NORMAL
536+ if self .prefix_tree_status_signal .value [0 ] == PrefixTreeStatus .CLEARED :
537+ self .prefix_tree_status_signal .value [0 ] = PrefixTreeStatus .UPDATING
538+ api_server_logger .info (f"Start to update prefix tree { self .prefix_tree_status_signal .value [0 ]} " )
539+ while self .prefix_tree_status_signal .value [0 ] != PrefixTreeStatus .NORMAL :
540+ api_server_logger .info (f"..updating prefix tree { self .prefix_tree_status_signal .value [0 ]} " )
541+ time .sleep (1 )
542+
543+ # model_weights_status_signal: CLEARED -> UPDATING -> NORMAL
532544 if self .model_weights_status_signal .value [0 ] == ModelWeightsStatus .NORMAL :
533545 return True , ""
534546 if self .model_weights_status_signal .value [0 ] == ModelWeightsStatus .UPDATING :
@@ -537,34 +549,13 @@ def update_model_weight(self, timeout=300):
537549 return False , "worker is clearing model weight, cannot update now"
538550
539551 self .model_weights_status_signal .value [0 ] = ModelWeightsStatus .UPDATING
540- if self .enable_prefix_caching or self .enable_splitwise :
541- self .kv_cache_status_signal .value [0 ] = KVCacheStatus .UPDATING
542- if self .enable_prefix_caching :
543- self .prefix_tree_status_signal .value [0 ] = PrefixTreeStatus .UPDATING
544- api_server_logger .info (f"start update model weight { self .model_weights_status_signal .value } " )
545- all_updated = False
546- while timeout >= 0 and not all_updated :
547- api_server_logger .info (
548- f"Updating model weights.. "
549- f"model_weights_status: { self .model_weights_status_signal .value [0 ]} , "
550- f"prefix_tree_status: { self .prefix_tree_status_signal .value [0 ]} , "
551- f"kv_cache_status: { self .kv_cache_status_signal .value [0 ]} "
552- )
553- weight_updated = self .model_weights_status_signal .value [0 ] == ModelWeightsStatus .NORMAL
554- cache_updated = self .kv_cache_status_signal .value [0 ] == KVCacheStatus .NORMAL
555- prefix_updated = self .prefix_tree_status_signal .value [0 ] == PrefixTreeStatus .NORMAL
556- if self .enable_prefix_caching or self .enable_splitwise :
557- if self .enable_prefix_caching :
558- all_updated = weight_updated and cache_updated and prefix_updated
559- else :
560- all_updated = weight_updated and cache_updated
561- else :
562- all_updated = weight_updated
552+ api_server_logger .info (f"Start to update model weight { self .model_weights_status_signal .value [0 ]} " )
553+ while timeout >= 0 and self .model_weights_status_signal .value [0 ] != ModelWeightsStatus .NORMAL :
554+ api_server_logger .info (f"..updating model weights { self .model_weights_status_signal .value [0 ]} " )
563555 time .sleep (1 )
564556 timeout -= 1
565557 if timeout < 0 :
566558 return False , "Update model weight timeout"
567- time .sleep (1 )
568559 return True , ""
569560
570561 def clear_load_weight (self , timeout = 300 ):
@@ -575,6 +566,19 @@ def clear_load_weight(self, timeout=300):
575566 """
576567
577568 with self .clear_update_lock :
569+ if self .fd_config .cache_config .swap_space :
570+ return False , "hierarchical cache clearing is not supported"
571+
572+ if self .enable_prefix_caching :
573+ # prefix_tree_status_signal: NORMAL -> CLEARING -> CLEARED
574+ if self .prefix_tree_status_signal .value [0 ] == PrefixTreeStatus .NORMAL :
575+ self .prefix_tree_status_signal .value [0 ] = PrefixTreeStatus .CLEARING
576+ api_server_logger .info (f"Start to clear prefix tree { self .prefix_tree_status_signal .value [0 ]} " )
577+ while self .prefix_tree_status_signal .value [0 ] != PrefixTreeStatus .CLEARED :
578+ api_server_logger .info (f"..clearing prefix tree { self .prefix_tree_status_signal .value [0 ]} " )
579+ time .sleep (1 )
580+
581+ # model_weights_status_signal: NORMAL -> CLEARING -> CLEARED
578582 if self .model_weights_status_signal .value [0 ] == ModelWeightsStatus .CLEARED :
579583 return True , ""
580584 if self .model_weights_status_signal .value [0 ] == ModelWeightsStatus .CLEARING :
@@ -583,36 +587,13 @@ def clear_load_weight(self, timeout=300):
583587 return False , "worker is updating model weight, cannot clear now"
584588
585589 self .model_weights_status_signal .value [0 ] = ModelWeightsStatus .CLEARING
586- if self .enable_prefix_caching or self .enable_splitwise :
587- self .kv_cache_status_signal .value [0 ] = KVCacheStatus .CLEARING
588- if self .enable_prefix_caching :
589- self .prefix_tree_status_signal .value [0 ] = PrefixTreeStatus .CLEARING
590-
591- api_server_logger .info (f"start clear model weight { self .model_weights_status_signal .value } " )
592- all_cleared = False
593- while timeout >= 0 and not all_cleared :
594- api_server_logger .info (
595- f"Clearing model weights.. "
596- f"model_weights_status: { self .model_weights_status_signal .value [0 ]} , "
597- f"prefix_tree_status: { self .prefix_tree_status_signal .value [0 ]} , "
598- f"kv_cache_status: { self .kv_cache_status_signal .value [0 ]} "
599- )
600- weight_cleared = self .model_weights_status_signal .value [0 ] == ModelWeightsStatus .CLEARED
601- cache_cleared = self .kv_cache_status_signal .value [0 ] == KVCacheStatus .CLEARED
602- prefix_cleared = self .prefix_tree_status_signal .value [0 ] == PrefixTreeStatus .CLEARED
603- if self .enable_prefix_caching or self .enable_splitwise :
604- if self .enable_prefix_caching :
605- all_cleared = weight_cleared and cache_cleared and prefix_cleared
606- else :
607- all_cleared = weight_cleared and cache_cleared
608- else :
609- all_cleared = weight_cleared
590+ api_server_logger .info (f"Start to clear model weight { self .model_weights_status_signal .value [0 ]} " )
591+ while timeout >= 0 and self .model_weights_status_signal .value [0 ] != ModelWeightsStatus .CLEARED :
592+ api_server_logger .info (f"..clearing model weights { self .model_weights_status_signal .value [0 ]} " )
610593 time .sleep (1 )
611594 timeout -= 1
612-
613595 if timeout < 0 :
614596 return False , "Clear model weight timeout"
615- time .sleep (1 )
616597 return True , ""
617598
618599 def check_model_weight_status (self ):
0 commit comments