[worker] fix fsdp worker (#422)

hiyouga · web-flow · commit a4a4128f6eca · 2025-07-14T23:49:48.000+08:00
diff --git a/verl/workers/fsdp_workers.py b/verl/workers/fsdp_workers.py
@@ -520,13 +520,13 @@ def update_actor(self, data: DataProto):
             metrics["actor/lr"] = lr
             self.lr_scheduler.step()
 
-            # Metrics should be in non_tensor_batch instead of meta_info, as DataProto not concat meta_info.
+            # Metrics should be in non_tensor_batch instead of meta_info, as DataProto not concat meta_info
             output = DataProto(
                 non_tensor_batch={
                     key: np.array([value] if np.isscalar(value) else value) for key, value in metrics.items()
                 }
             )
-            output = self.ulysses_sharding_manager.postprocess_data(data=output)
+            # Metrics do not need post processing since their batch size is 1
 
         if self._use_param_offload:
             offload_fsdp_model(self.fsdp_module)
@@ -677,13 +677,13 @@ def update_critic(self, data: DataProto):
             lr = self.lr_scheduler.get_last_lr()[0]
             metrics["critic/lr"] = lr
 
-            # Metrics should be in non_tensor_batch instead of meta_info, as DataProto not concat meta_info.
+            # Metrics should be in non_tensor_batch instead of meta_info, as DataProto not concat meta_info
             output = DataProto(
                 non_tensor_batch={
-                    metric: np.array([value] if np.isscalar(value) else value) for metric, value in metrics.items()
+                    key: np.array([value] if np.isscalar(value) else value) for key, value in metrics.items()
                 }
             )
-            data = self.ulysses_sharding_manager.postprocess_data(data=output)
+            # Metrics do not need post processing since their batch size is 1
 
         if self._use_param_offload:
             offload_fsdp_model(self.fsdp_module)