We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent d275418 commit 97830a3Copy full SHA for 97830a3
trl/trainer/ppo_trainer.py
@@ -563,7 +563,7 @@ def repeat_generator():
563
rewards = non_score_reward.clone()
564
actual_start = torch.arange(rewards.size(0), device=rewards.device)
565
actual_end = torch.where(sequence_lengths_p1 < rewards.size(1), sequence_lengths_p1, sequence_lengths)
566
- rewards[[actual_start, actual_end]] += scores
+ rewards[actual_start, actual_end] += scores
567
568
# 5. whiten rewards
569
if args.whiten_rewards:
0 commit comments