@@ -48,6 +48,21 @@ def _init_executor(self) -> None:
4848 if USE_RAY_COMPILED_DAG :
4949 self .forward_dag = self ._compiled_ray_dag ()
5050
51+ def _configure_ray_workers_use_nsight (self ,
52+ ray_remote_kwargs ) -> Dict [str , Any ]:
53+ # If nsight profiling is enabled, we need to set the profiling
54+ # configuration for the ray workers as runtime env.
55+ runtime_env = ray_remote_kwargs .setdefault ("runtime_env" , {})
56+ runtime_env .update ({
57+ "nsight" : {
58+ "t" : "cuda,cudnn,cublas" ,
59+ "o" : "'worker_process_%p'" ,
60+ "cuda-graph-trace" : "node" ,
61+ }
62+ })
63+
64+ return ray_remote_kwargs
65+
5166 def _init_workers_ray (self , placement_group : "PlacementGroup" ,
5267 ** ray_remote_kwargs ):
5368 if self .parallel_config .tensor_parallel_size == 1 :
@@ -63,6 +78,10 @@ def _init_workers_ray(self, placement_group: "PlacementGroup",
6378 # The remaining workers are the actual ray actors.
6479 self .workers : List [RayWorkerVllm ] = []
6580
81+ if self .parallel_config .ray_workers_use_nsight :
82+ ray_remote_kwargs = self ._configure_ray_workers_use_nsight (
83+ ray_remote_kwargs )
84+
6685 # Create the workers.
6786 driver_ip = get_ip ()
6887 for bundle_id , bundle in enumerate (placement_group .bundle_specs ):
0 commit comments