volcengine · vermouth1992 · Jul 25, 2025 · Jul 24, 2025 · gemini-code-assist · Jul 25, 2025
@@ -1,7 +1,7 @@
 在昇腾设备上基于FSDP后端进行数据采集
 ====================================
 
-Last updated: 07/14/2025.
+Last updated: 07/24/2025.
 
 这是一份在昇腾设备上基于FSDP后端使用GRPO或DAPO算法进行数据采集的教程。
 
@@ -32,6 +32,14 @@ Last updated: 07/14/2025.
 通过 npu_profile.yaml 中的参数控制具体采集行为：
 
 -  save_path：采集数据的存放路径
+-  roles: 采集的角色，下列为可选项
+
+   -  rollout_generate：采集rollout的generate_sequences阶段
+   -  actor_compute_log_prob：采集actor的compute_log_prob阶段
+   -  actor_update：采集actor的update_actor阶段
+   -  ref_compute_log_prob：采集ref的compute_ref_log_prob阶段
+   -  all： 采集以上所有阶段
+
 -  level：采集等级，可选项为level_none、level0、level1和level2
 
    -  level_none：不采集所有Level层级控制的数据，即关闭profiler_level
@@ -86,6 +94,23 @@ Last updated: 07/14/2025.
                 ranks: [0, 1]
 
 
+离散模式采集actor
+~~~~~~~~~~~~~~~~~~
+
+.. code:: yaml
+
+       trainer:
+           profile_steps: [1, 2, 5]
+           npu_profile:
+                options:
+                    roles: ["actor_compute_log_prob", "actor_update"]
+       actor_rollout_ref:
+            profiler:
+                discrete: True
+                all_ranks: False
+                ranks: [0, 1]
+
+
 可视化
 ------
 

@@ -1,7 +1,7 @@
 Data collection based on FSDP (Fully Sharded Data Parallel) backend on Ascend devices(NPU)
 ==========================================================================================
 
-Last updated: 07/14/2025.
+Last updated: 07/24/2025.
 
 This is a tutorial for data collection using the GRPO or DAPO algorithm
 based on FSDP on Ascend devices.
@@ -35,6 +35,17 @@ and steps.
 Use parameters in npu_profile.yaml to control collection behavior:
 
 -  save_path: Storage path for collected data.
+-  roles: Roles to collect. The following options are available
+
+   -  rollout_generate: Collect the `generate_sequences` phase 
+      of rollout worker.
+   -  actor_compute_log_prob: Collect the `compute_log_prob` phase 
+      of the actor worker.
+   -  actor_update:  Collect the `update_actor` phase of the actor worker.
+   -  ref_compute_log_prob: Collect the `compute_ref_log_prob` phase 
+      of the ref worker.
+   -  all: Collect all of the above phases.
+
 -  level: Collection level—options are level_none, level0, level1, and
    level2
 
@@ -94,6 +105,23 @@ Discrete Mode Collection
                 ranks: [0, 1]
 
 
+Enable actor collection in discrete mode
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: yaml
+
+       trainer:
+           profile_steps: [1, 2, 5]
+           npu_profile:
+                options:
+                    roles: ["actor_compute_log_prob", "actor_update"]
+       actor_rollout_ref:
+            profiler:
+                discrete: True
+                all_ranks: False
+                ranks: [0, 1]
+
+
 Visualization
 -------------
 

@@ -16,6 +16,7 @@ WITH_CPU=True
 WITH_MODULE=False
 WITH_STACK=False
 ANALYSIS=True
+ROLES=["all"]
 
 python3 -m verl.trainer.main_ppo \
     algorithm.adv_estimator=grpo \
@@ -59,6 +60,7 @@ python3 -m verl.trainer.main_ppo \
     trainer.npu_profile.options.with_module=$WITH_MODULE \
     trainer.npu_profile.options.with_stack=$WITH_STACK \
     trainer.npu_profile.options.analysis=$ANALYSIS \
+    trainer.npu_profile.options.roles=$ROLES \
     trainer.critic_warmup=0 \
     trainer.logger=console \
     trainer.project_name='verl_grpo_example_gsm8k' \

@@ -453,6 +453,7 @@ trainer:
   npu_profile:
     options:
       save_path: ./profiler_data
+      roles: ["all"]
       level: level1
       with_memory: False
       record_shapes: False

@@ -998,6 +998,11 @@ trainer:
       # Storage path of collected data.
       save_path: ./profiler_data
 
+      # The roles that will be profiled. Only takes effect in discrete mode.
+      # optional values: all, rollout_generate, actor_compute_log_prob, actor_update and ref_compute_log_prob.
+      # "all" means all roles will be profiled.
+      roles: ["all"]
+
       # Collection level, optional values: level_none, level0, level1, level2.
       level: level1
 

@@ -207,6 +207,8 @@ trainer:
   npu_profile:
     options:
       save_path: ./profiler_data
+      roles:
+      - all
       level: level1
       with_memory: false
       record_shapes: false

@@ -175,6 +175,8 @@ trainer:
   npu_profile:
     options:
       save_path: ./profiler_data
+      roles:
+      - all
       level: level1
       with_memory: false
       record_shapes: false

@@ -4,6 +4,11 @@ options:
   # Storage path of collected data.
   save_path: ./profiler_data
 
+  # The roles that will be profiled. Only takes effect in discrete mode.
+  # optional values: all, rollout_generate, actor_compute_log_prob, actor_update and ref_compute_log_prob.
+  # "all" means all roles will be profiled.
+  roles: ["all"]
+
   # Collection level, optional values: level_none, level0, level1, level2.
   level: level1
 

diff --git a/verl/utils/profiler/mstx_profile.py b/verl/utils/profiler/mstx_profile.py
@@ -202,20 +202,36 @@ def decorator(func):
             @functools.wraps(func)
             def wrapper(self, *args, **kwargs):
                 profile_name = message or func.__name__
-
-                if self.profiler.this_step and self.profile_option is not None:
-                    if self.profiler.discrete:
-                        profile_npu = get_npu_profiler(option=self.profile_option, role=role)
-                        profile_npu.start()
-                    mark_range = mark_start_range(message=profile_name)
+                profile_this_role = True
+                discrete_mode = self.profiler.discrete
+                profile_enable = self.profiler.this_step and self.profile_option is not None
+
+                if not profile_enable:
+                    return func(self, *args, **kwargs)
+
+                if profile_enable and role is not None:
+                    target_roles = self.profile_option.get("roles", [])
+                    profile_this_role = "all" in target_roles or role in target_roles
+
+                if profile_enable:
+                    if not discrete_mode:
+                        mark_range = mark_start_range(message=profile_name)
+                    else:
+                        if profile_this_role:
+                            profile_npu = get_npu_profiler(option=self.profile_option, role=role)
+                            profile_npu.start()
+                            mark_range = mark_start_range(message=profile_name)
 
                 result = func(self, *args, **kwargs)
 
-                if self.profiler.this_step and self.profile_option is not None:
-                    mark_end_range(mark_range)
-                    if self.profiler.discrete:
-                        profile_npu.step()
-                        profile_npu.stop()
+                if profile_enable:
+                    if not discrete_mode:
+                        mark_end_range(mark_range)
+                    else:
+                        if profile_this_role:
+                            mark_end_range(mark_range)
+                            profile_npu.step()
+                            profile_npu.stop()
 
                 return result