11set -x
22
3+ project_name=' GRPO-Qwen3'
4+ exp_name=' GRPO-Qwen3-32b-npu'
5+ gen_tp=4
6+ RAY_DATA_HOME=${RAY_DATA_HOME:- " ${HOME} /verl" }
7+ MODEL_PATH=${MODEL_PATH:- " ${RAY_DATA_HOME} /models/Qwen3-32B" }
8+ TRAIN_FILE=${TRAIN_FILE:- " ${RAY_DATA_HOME} /data/gsm8k/train.parquet" }
9+ TEST_FILE=${TEST_FILE:- " ${RAY_DATA_HOME} /data/gsm8k/test.parquet" }
10+
311python3 -m verl.trainer.main_ppo \
412 algorithm.adv_estimator=grpo \
5- data.train_files=$HOME /data/gsm8k/train.parquet \
6- data.val_files=$HOME /data/gsm8k/test.parquet \
13+ data.train_files=" ${TRAIN_FILE} " \
14+ data.val_files=" ${TEST_FILE} " \
715 data.train_batch_size=1024 \
816 data.max_prompt_length=2048 \
917 data.max_response_length=2048 \
1018 data.filter_overlong_prompts=True \
1119 data.truncation=' error' \
1220 data.shuffle=False \
13- actor_rollout_ref.model.path=Qwen/Qwen3-32B \
21+ actor_rollout_ref.model.path=${MODEL_PATH} \
1422 actor_rollout_ref.actor.optim.lr=1e-6 \
1523 actor_rollout_ref.model.use_remove_padding=True \
1624 actor_rollout_ref.actor.ulysses_sequence_parallel_size=4 \
@@ -27,7 +35,7 @@ python3 -m verl.trainer.main_ppo \
2735 actor_rollout_ref.actor.fsdp_config.param_offload=True \
2836 actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
2937 actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8 \
30- actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
38+ actor_rollout_ref.rollout.tensor_model_parallel_size=${gen_tp} \
3139 actor_rollout_ref.rollout.name=vllm \
3240 actor_rollout_ref.rollout.gpu_memory_utilization=0.7 \
3341 actor_rollout_ref.rollout.n=4 \
@@ -40,8 +48,8 @@ python3 -m verl.trainer.main_ppo \
4048 algorithm.use_kl_in_reward=False \
4149 trainer.critic_warmup=0 \
4250 trainer.logger=[' console' ,' tensorboard' ] \
43- trainer.project_name=' verl_grpo_example_gsm8k_fsdp ' \
44- trainer.experiment_name=' qwen3_32b_fsdp ' \
51+ trainer.project_name=" ${project_name} " \
52+ trainer.experiment_name=" ${exp_name} " \
4553 trainer.n_gpus_per_node=16 \
4654 trainer.nnodes=2 \
4755 trainer.resume_from_path=checkpoints/ \
0 commit comments