Skip to content

Commit 88b5bc7

Browse files
techkanggemini-code-assist[bot]vermouth1992ccclyu
authored andcommitted
[misc] feat: remove redundant default params (volcengine#3577)
### What does this PR do? This PR introduces two changes: 1. Removal of redundant default parameters: Default optimizer values are already set in the .yaml configuration file. Defining them again in other files is redundant and can cause confusion for users. 2. Alignment of warm-up step logic: Changed the condition from `num_warmup_steps < 0` to `num_warmup_steps <= 0`. This aligns the code with the documentation in the YAML file and matches the implementation in Megatron. https://github.com/volcengine/verl/blob/main/verl/trainer/config/actor/actor.yaml#L132 --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Chi Zhang <[email protected]> Co-authored-by: Changlong Yu <[email protected]>
1 parent e882ab4 commit 88b5bc7

File tree

3 files changed

+13
-13
lines changed

3 files changed

+13
-13
lines changed

verl/trainer/config/actor/actor.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ optim:
120120
# Learning rate
121121
lr: 1e-6
122122

123-
# Warmup steps ratio (used if lr_warmup_steps is negative)
123+
# Warmup steps ratio (used if lr_warmup_steps is 0 or negative)
124124
lr_warmup_steps_ratio: 0.0
125125

126126
# Total training steps (must be overridden at runtime)

verl/utils/megatron/optimizer.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@
2323

2424
def init_megatron_optim_config(optim_config: dict) -> OptimizerConfig:
2525
optim_args = {
26-
"optimizer": optim_config.get("optimizer", "adam"),
27-
"lr": optim_config.get("lr"),
28-
"min_lr": optim_config.get("min_lr", None),
29-
"clip_grad": optim_config.get("clip_grad", 1.0),
30-
"weight_decay": optim_config.get("weight_decay", 0.01),
26+
"optimizer": optim_config.optimizer,
27+
"lr": optim_config.lr,
28+
"min_lr": optim_config.min_lr,
29+
"clip_grad": optim_config.clip_grad,
30+
"weight_decay": optim_config.weight_decay,
3131
"bf16": True,
3232
"params_dtype": torch.bfloat16,
3333
"use_distributed_optimizer": True,

verl/workers/engine/fsdp/transformer_impl.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -375,13 +375,13 @@ def _build_lr_scheduler(self, optimizer):
375375

376376
optim_config = self.optimizer_config
377377

378-
total_steps = optim_config.get("total_training_steps", 0)
379-
num_warmup_steps = int(optim_config.get("lr_warmup_steps", -1))
380-
warmup_style = optim_config.get("warmup_style", "constant")
381-
min_lr_ratio = optim_config.get("min_lr_ratio", 0.0)
382-
num_cycles = optim_config.get("num_cycles", 0.5)
383-
if num_warmup_steps < 0:
384-
num_warmup_steps_ratio = optim_config.get("lr_warmup_steps_ratio", 0.0)
378+
total_steps = optim_config.total_training_steps
379+
num_warmup_steps = optim_config.lr_warmup_steps
380+
warmup_style = optim_config.warmup_style
381+
min_lr_ratio = optim_config.min_lr_ratio
382+
num_cycles = optim_config.num_cycles
383+
if num_warmup_steps <= 0:
384+
num_warmup_steps_ratio = optim_config.lr_warmup_steps_ratio
385385
num_warmup_steps = int(num_warmup_steps_ratio * total_steps)
386386

387387
if self.rank == 0:

0 commit comments

Comments
 (0)