diff --git a/d3rlpy/preprocessing/reward_scalers.py b/d3rlpy/preprocessing/reward_scalers.py index 175f3e17..1a21fe13 100644 --- a/d3rlpy/preprocessing/reward_scalers.py +++ b/d3rlpy/preprocessing/reward_scalers.py @@ -297,8 +297,9 @@ def fit_with_trajectory_slicer( ).rewards for episode in episodes ] - self.mean = float(np.mean(rewards)) - self.std = float(np.std(rewards)) + flat_rewards = np.concatenate(rewards) + self.mean = float(np.mean(flat_rewards)) + self.std = float(np.std(flat_rewards)) def transform(self, x: torch.Tensor) -> torch.Tensor: assert self.built