We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 3f929af commit ee65422Copy full SHA for ee65422
verl/workers/sharding_manager/fsdp_sglang.py
@@ -173,7 +173,11 @@ async def wake_up(self):
173
get_torch_device().empty_cache()
174
log_gpu_memory_usage("After del state_dict and empty_cache in sharding manager", logger=logger)
175
176
- if self.multi_stage_wake_up and self.rollout_config.free_cache_engine:
+ if (
177
+ self.multi_stage_wake_up
178
+ and self.rollout_config.free_cache_engine
179
+ and self.device_mesh["infer_tp"].get_local_rank() == 0
180
+ ):
181
await self.inference_engine.resume_memory_occupation(tags=["kv_cache"])
182
log_gpu_memory_usage("After resume SGLang kv_cache in sharding manager", logger=logger)
183
0 commit comments