Skip to content

Commit 7f1c759

Browse files
committed
unify the filename and training menthods of the two scripts
1 parent 00ff157 commit 7f1c759

File tree

2 files changed

+17
-17
lines changed

2 files changed

+17
-17
lines changed

examples/grpo_trainer/run_qwen3_32b_npu.sh renamed to examples/grpo_trainer/run_qwen3-32b_npu.sh

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,24 @@
11
set -x
22

3+
project_name='GRPO-Qwen3'
4+
exp_name='GRPO-Qwen3-32b-npu'
5+
gen_tp=4
6+
RAY_DATA_HOME=${RAY_DATA_HOME:-"${HOME}/verl"}
7+
MODEL_PATH=${MODEL_PATH:-"${RAY_DATA_HOME}/models/Qwen3-32B"}
8+
TRAIN_FILE=${TRAIN_FILE:-"${RAY_DATA_HOME}/data/gsm8k/train.parquet"}
9+
TEST_FILE=${TEST_FILE:-"${RAY_DATA_HOME}/data/gsm8k/test.parquet"}
10+
311
python3 -m verl.trainer.main_ppo \
412
algorithm.adv_estimator=grpo \
5-
data.train_files=$HOME/data/gsm8k/train.parquet \
6-
data.val_files=$HOME/data/gsm8k/test.parquet \
13+
data.train_files="${TRAIN_FILE}" \
14+
data.val_files="${TEST_FILE}" \
715
data.train_batch_size=1024 \
816
data.max_prompt_length=2048 \
917
data.max_response_length=2048 \
1018
data.filter_overlong_prompts=True \
1119
data.truncation='error' \
1220
data.shuffle=False \
13-
actor_rollout_ref.model.path=Qwen/Qwen3-32B \
21+
actor_rollout_ref.model.path=${MODEL_PATH} \
1422
actor_rollout_ref.actor.optim.lr=1e-6 \
1523
actor_rollout_ref.model.use_remove_padding=True \
1624
actor_rollout_ref.actor.ulysses_sequence_parallel_size=4 \
@@ -27,7 +35,7 @@ python3 -m verl.trainer.main_ppo \
2735
actor_rollout_ref.actor.fsdp_config.param_offload=True \
2836
actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
2937
actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8 \
30-
actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
38+
actor_rollout_ref.rollout.tensor_model_parallel_size=${gen_tp} \
3139
actor_rollout_ref.rollout.name=vllm \
3240
actor_rollout_ref.rollout.gpu_memory_utilization=0.7 \
3341
actor_rollout_ref.rollout.n=4 \
@@ -40,8 +48,8 @@ python3 -m verl.trainer.main_ppo \
4048
algorithm.use_kl_in_reward=False \
4149
trainer.critic_warmup=0 \
4250
trainer.logger=['console','tensorboard'] \
43-
trainer.project_name='verl_grpo_example_gsm8k_fsdp' \
44-
trainer.experiment_name='qwen3_32b_fsdp' \
51+
trainer.project_name="${project_name}" \
52+
trainer.experiment_name="${exp_name}" \
4553
trainer.n_gpus_per_node=16 \
4654
trainer.nnodes=2 \
4755
trainer.resume_from_path=checkpoints/ \

examples/grpo_trainer/run_qwen3-8b_npu.sh

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,15 @@
1-
# Tested successfully on the hiyouga/verl:ngc-th2.6.0-cu126-vllm0.8.4-flashinfer0.2.2-cxx11abi0 image.
2-
# It outperforms the Qwen2 7B base model by two percentage points on the test set of GSM8K.
31
set -x
42

5-
project_name='GRPO'
3+
project_name='GRPO-Qwen3'
64
exp_name='GRPO-Qwen3-8B-npu'
75
gen_tp=2
8-
WORKING_DIR=${WORKING_DIR:-"./"}
9-
RUNTIME_ENV=${RUNTIME_ENV:-"${WORKING_DIR}/verl/trainer/runtime_env.yaml"}
10-
RAY_ADDRESS=${RAY_ADDRESS:-"http://localhost:8265"}
116
RAY_DATA_HOME=${RAY_DATA_HOME:-"${HOME}/verl"}
127
MODEL_PATH=${MODEL_PATH:-"${RAY_DATA_HOME}/models/Qwen3-8B"}
138
CKPTS_DIR=${CKPTS_DIR:-"${RAY_DATA_HOME}/ckpts/${project_name}/${exp_name}"}
149
TRAIN_FILE=${TRAIN_FILE:-"${RAY_DATA_HOME}/data/dapo-math-17k.parquet"}
1510
TEST_FILE=${TEST_FILE:-"${RAY_DATA_HOME}/data/aime-2024.parquet"}
1611

17-
ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \
18-
--working-dir "${WORKING_DIR}" \
19-
--address "${RAY_ADDRESS}" \
20-
-- python3 -m verl.trainer.main_ppo \
12+
python3 -m verl.trainer.main_ppo \
2113
algorithm.adv_estimator=grpo \
2214
data.train_files="${TRAIN_FILE}" \
2315
data.val_files="${TEST_FILE}" \
@@ -26,7 +18,7 @@ ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \
2618
data.max_response_length=1024 \
2719
data.filter_overlong_prompts=True \
2820
data.truncation='error' \
29-
actor_rollout_ref.model.path=${MODEL_PATH}} \
21+
actor_rollout_ref.model.path=${MODEL_PATH} \
3022
actor_rollout_ref.actor.optim.lr=1e-6 \
3123
actor_rollout_ref.model.use_remove_padding=True \
3224
actor_rollout_ref.actor.ppo_mini_batch_size=64 \

0 commit comments

Comments
 (0)