Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/e2e_eval_aime24.yml
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ jobs:
- name: Install the current repository
run: |
pip3 install --no-deps -e .[test,gpu,math]
pip3 install math-verify
pip3 install math-verify transformers==4.56.2
- name: Prepare aime24 dataset
run: |
ray stop --force
Expand Down
5 changes: 5 additions & 0 deletions .github/workflows/e2e_ppo_trainer_megatron_vllm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ permissions:
env:
IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
TRANSFORMERS_VERSION: "4.56.2"

jobs:
setup:
Expand Down Expand Up @@ -121,6 +122,7 @@ jobs:
- name: Install the current repository
run: |
pip3 install --no-deps -e .[test]
pip3 install math-verify transformers==$TRANSFORMERS_VERSION
- name: Prepare GSM8K dataset
run: |
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
Expand Down Expand Up @@ -173,6 +175,7 @@ jobs:
- name: Install the current repository
run: |
pip3 install --no-deps -e .[test]
pip3 install math-verify transformers==$TRANSFORMERS_VERSION
- name: Prepare GSM8K dataset
run: |
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
Expand Down Expand Up @@ -210,6 +213,7 @@ jobs:
- name: Install the current repository
run: |
pip3 install --no-deps -e .[test]
pip3 install math-verify transformers==$TRANSFORMERS_VERSION
- name: Prepare GSM8K dataset
run: |
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
Expand Down Expand Up @@ -241,6 +245,7 @@ jobs:
- name: Install the current repository
run: |
pip3 install --no-deps -e .[test]
pip3 install math-verify transformers==$TRANSFORMERS_VERSION
- name: Prepare GSM8K dataset
run: |
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
Expand Down
7 changes: 6 additions & 1 deletion .github/workflows/e2e_ppo_trainer_megatron_vllm_2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ permissions:
env:
IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
TRANSFORMERS_VERSION: "4.56.2"

jobs:
setup:
Expand Down Expand Up @@ -121,6 +122,7 @@ jobs:
- name: Install the current repository
run: |
pip3 install --no-deps -e .[test]
pip3 install transformers==$TRANSFORMERS_VERSION
- name: Prepare GSM8K dataset
run: |
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
Expand Down Expand Up @@ -154,6 +156,7 @@ jobs:
run: |
pip3 install --no-deps -e .[test]
pip3 install mbridge
pip3 install transformers==$TRANSFORMERS_VERSION
- name: Prepare GSM8K dataset
run: |
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
Expand Down Expand Up @@ -186,6 +189,7 @@ jobs:
- name: Install the current repository
run: |
pip3 install --no-deps -e .[test]
pip3 install transformers==$TRANSFORMERS_VERSION
- name: Prepare Geo3k dataset
run: |
python3 examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/models/hf_data/hiyouga/geometry3k/
Expand Down Expand Up @@ -220,6 +224,7 @@ jobs:
- name: Install the current repository
run: |
pip3 install --no-deps -e .[test,vllm]
pip3 install transformers==$TRANSFORMERS_VERSION
- name: Prepare GSM8K dataset
run: |
ray stop --force
Expand Down Expand Up @@ -359,7 +364,7 @@ jobs:
- name: Install the current repository
run: |
pip3 install --no-deps -e .[test,gpu,vllm,geo,trl]
pip install "transformers[hf_xet]==4.54.0"
pip3 install transformers==$TRANSFORMERS_VERSION
# Geo3k
- name: Prepare GEO3K dataset
run: |
Expand Down
44 changes: 40 additions & 4 deletions .github/workflows/reward_model.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,32 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}


env:
IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2"
DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
TRANSFORMERS_VERSION: "4.56.2"


jobs:
setup:
if: github.repository_owner == 'volcengine'
runs-on: ubuntu-latest
outputs:
runner-label: ${{ steps.create-runner.outputs.runner-label }}
mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
steps:
- uses: actions/checkout@v4
- id: create-runner
uses: volcengine/vemlp-github-runner@v1
with:
mode: "create"
faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
mlp-image: "${{ env.IMAGE }}"

reward_model:
runs-on: [L20x8]
needs: setup
runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
timeout-minutes: 20 # Increase this timeout value as needed
env:
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
Expand All @@ -71,9 +94,6 @@ jobs:
SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK: "True"
NCCL_SHM_DISABLE: "1"
NCCL_P2P_DISABLE: "1"
container:
image: verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2
options: --gpus all --shm-size=10g
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
Expand All @@ -93,3 +113,19 @@ jobs:
run: |
unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY
pytest -s -x tests/workers/reward_model/test_generative_reward_model.py

cleanup:
runs-on: ubuntu-latest
needs:
[
setup,
reward_model
]
if: always()
steps:
- id: destroy-runner
uses: volcengine/vemlp-github-runner@v1
with:
mode: "destroy"
faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
5 changes: 5 additions & 0 deletions recipe/dapo/dapo_ray_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,11 @@ def fit(self):
actor_output_metrics = reduce_metrics(actor_output.meta_info["metrics"])
metrics.update(actor_output_metrics)

# Log rollout generations if enabled
rollout_data_dir = self.config.trainer.get("rollout_data_dir", None)
if rollout_data_dir:
self._log_rollout_data(batch, reward_extra_infos_dict, timing_raw, rollout_data_dir)

# validate
if (
self.val_reward_fn is not None
Expand Down
17 changes: 1 addition & 16 deletions recipe/one_step_off_policy/ray_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,22 +552,7 @@ def fit(self):
# Log rollout generations if enabled
rollout_data_dir = self.config.trainer.get("rollout_data_dir", None)
if rollout_data_dir:
with marked_timer("dump_rollout_generations", timing_raw, color="green"):
inputs = self.tokenizer.batch_decode(batch.batch["prompts"], skip_special_tokens=True)
outputs = self.tokenizer.batch_decode(batch.batch["responses"], skip_special_tokens=True)
scores = batch.batch["token_level_scores"].sum(-1).cpu().tolist()
sample_gts = [
item.non_tensor_batch.get("reward_model", {}).get("ground_truth", None) for item in batch
]

self._dump_generations(
inputs=inputs,
outputs=outputs,
gts=sample_gts,
scores=scores,
reward_extra_infos_dict=reward_extra_infos_dict,
dump_path=rollout_data_dir,
)
self._log_rollout_data(batch, reward_extra_infos_dict, timing_raw, rollout_data_dir)

# validate
if (
Expand Down
17 changes: 1 addition & 16 deletions recipe/sppo/sppo_ray_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,22 +308,7 @@ def fit(self):
# Log rollout generations if enabled
rollout_data_dir = self.config.trainer.get("rollout_data_dir", None)
if rollout_data_dir:
with simple_timer("dump_rollout_generations", timing_raw):
print(batch.batch.keys())
inputs = self.tokenizer.batch_decode(batch.batch["prompts"], skip_special_tokens=True)
outputs = self.tokenizer.batch_decode(batch.batch["responses"], skip_special_tokens=True)
scores = batch.batch["token_level_scores"].sum(-1).cpu().tolist()
sample_gts = [
item.non_tensor_batch.get("reward_model", {}).get("ground_truth", None) for item in batch
]
self._dump_generations(
inputs=inputs,
outputs=outputs,
scores=scores,
reward_extra_infos_dict=reward_extra_infos_dict,
gts=sample_gts,
dump_path=rollout_data_dir,
)
self._log_rollout_data(batch, reward_extra_infos_dict, timing_raw, rollout_data_dir)

# validate
if (
Expand Down
2 changes: 1 addition & 1 deletion verl/trainer/config/actor/actor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ optim:
# Learning rate
lr: 1e-6

# Warmup steps ratio (used if lr_warmup_steps is negative)
# Warmup steps ratio (used if lr_warmup_steps is 0 or negative)
lr_warmup_steps_ratio: 0.0

# Total training steps (must be overridden at runtime)
Expand Down
56 changes: 33 additions & 23 deletions verl/trainer/ppo/ray_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,38 @@ def _dump_generations(self, inputs, outputs, gts, scores, reward_extra_infos_dic

print(f"Dumped generations to {filename}")

def _log_rollout_data(
self, batch: DataProto, reward_extra_infos_dict: dict, timing_raw: dict, rollout_data_dir: str
):
"""Log rollout data to disk.
Args:
batch (DataProto): The batch containing rollout data
reward_extra_infos_dict (dict): Additional reward information to log
timing_raw (dict): Timing information for profiling
rollout_data_dir (str): Directory path to save the rollout data
"""
with marked_timer("dump_rollout_generations", timing_raw, color="green"):
inputs = self.tokenizer.batch_decode(batch.batch["prompts"], skip_special_tokens=True)
outputs = self.tokenizer.batch_decode(batch.batch["responses"], skip_special_tokens=True)
scores = batch.batch["token_level_scores"].sum(-1).cpu().tolist()
sample_gts = [item.non_tensor_batch.get("reward_model", {}).get("ground_truth", None) for item in batch]

reward_extra_infos_to_dump = reward_extra_infos_dict.copy()
if "request_id" in batch.non_tensor_batch:
reward_extra_infos_dict.setdefault(
"request_id",
batch.non_tensor_batch["request_id"].tolist(),
)

self._dump_generations(
inputs=inputs,
outputs=outputs,
gts=sample_gts,
scores=scores,
reward_extra_infos_dict=reward_extra_infos_to_dump,
dump_path=rollout_data_dir,
)

def _maybe_log_val_generations(self, inputs, outputs, scores):
"""Log a table of validation samples to the configured logger (wandb or swanlab)"""

Expand Down Expand Up @@ -1111,29 +1143,7 @@ def fit(self):
# Log rollout generations if enabled
rollout_data_dir = self.config.trainer.get("rollout_data_dir", None)
if rollout_data_dir:
with marked_timer("dump_rollout_generations", timing_raw, color="green"):
inputs = self.tokenizer.batch_decode(batch.batch["prompts"], skip_special_tokens=True)
outputs = self.tokenizer.batch_decode(batch.batch["responses"], skip_special_tokens=True)
scores = batch.batch["token_level_scores"].sum(-1).cpu().tolist()
sample_gts = [
item.non_tensor_batch.get("reward_model", {}).get("ground_truth", None)
for item in batch
]

if "request_id" in batch.non_tensor_batch:
reward_extra_infos_dict.setdefault(
"request_id",
batch.non_tensor_batch["request_id"].tolist(),
)

self._dump_generations(
inputs=inputs,
outputs=outputs,
gts=sample_gts,
scores=scores,
reward_extra_infos_dict=reward_extra_infos_dict,
dump_path=rollout_data_dir,
)
self._log_rollout_data(batch, reward_extra_infos_dict, timing_raw, rollout_data_dir)

# validate
if (
Expand Down
10 changes: 5 additions & 5 deletions verl/utils/megatron/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@

def init_megatron_optim_config(optim_config: dict) -> OptimizerConfig:
optim_args = {
"optimizer": optim_config.get("optimizer", "adam"),
"lr": optim_config.get("lr"),
"min_lr": optim_config.get("min_lr", None),
"clip_grad": optim_config.get("clip_grad", 1.0),
"weight_decay": optim_config.get("weight_decay", 0.01),
"optimizer": optim_config.optimizer,
"lr": optim_config.lr,
"min_lr": optim_config.min_lr,
"clip_grad": optim_config.clip_grad,
"weight_decay": optim_config.weight_decay,
"bf16": True,
"params_dtype": torch.bfloat16,
"use_distributed_optimizer": True,
Expand Down
14 changes: 7 additions & 7 deletions verl/workers/engine/fsdp/transformer_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,13 +375,13 @@ def _build_lr_scheduler(self, optimizer):

optim_config = self.optimizer_config

total_steps = optim_config.get("total_training_steps", 0)
num_warmup_steps = int(optim_config.get("lr_warmup_steps", -1))
warmup_style = optim_config.get("warmup_style", "constant")
min_lr_ratio = optim_config.get("min_lr_ratio", 0.0)
num_cycles = optim_config.get("num_cycles", 0.5)
if num_warmup_steps < 0:
num_warmup_steps_ratio = optim_config.get("lr_warmup_steps_ratio", 0.0)
total_steps = optim_config.total_training_steps
num_warmup_steps = optim_config.lr_warmup_steps
warmup_style = optim_config.warmup_style
min_lr_ratio = optim_config.min_lr_ratio
num_cycles = optim_config.num_cycles
if num_warmup_steps <= 0:
num_warmup_steps_ratio = optim_config.lr_warmup_steps_ratio
num_warmup_steps = int(num_warmup_steps_ratio * total_steps)

if self.rank == 0:
Expand Down
Loading