Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions examples/baselines/qwen2_5_vl_3b_clevr.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ set -x

MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path

SYSTEM_PROMPT="""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant
FORMAT_PROMPT="""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant
first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning
process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e.,
<think> reasoning process here </think><answer> answer here </answer>"""
Expand All @@ -11,10 +11,9 @@ python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=BUAADreamer/clevr_count_70k@train \
data.val_files=BUAADreamer/clevr_count_70k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
data.format_prompt="${FORMAT_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.tensor_parallel_size=1 \
worker.rollout.enable_chunked_prefill=false \
worker.reward.compute_score=r1v \
worker.reward.score_function=r1v \
trainer.experiment_name=qwen2_5_vl_3b_clevr \
trainer.n_gpus_per_node=2
7 changes: 3 additions & 4 deletions examples/baselines/qwen2_5_vl_3b_geoqa8k.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ set -x

MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path

SYSTEM_PROMPT="""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant
FORMAT_PROMPT="""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant
first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning
process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e.,
<think> reasoning process here </think><answer> answer here </answer>"""
Expand All @@ -11,10 +11,9 @@ python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=leonardPKU/GEOQA_8K_R1V@train \
data.val_files=leonardPKU/GEOQA_8K_R1V@test \
data.system_prompt="${SYSTEM_PROMPT}" \
data.format_prompt="${FORMAT_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.tensor_parallel_size=1 \
worker.rollout.enable_chunked_prefill=false \
worker.reward.compute_score=r1v \
worker.reward.score_function=r1v \
trainer.experiment_name=qwen2_5_vl_3b_geoqa8k \
trainer.n_gpus_per_node=8
5 changes: 3 additions & 2 deletions examples/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ worker:

reward:
reward_type: function
compute_score: math
score_function: math
skip_special_tokens: true

trainer:
total_episodes: 15
Expand All @@ -80,7 +81,7 @@ trainer:
val_freq: 5 # -1 to disable
val_before_train: true
val_only: false
val_generations_to_log: 1
val_generations_to_log: 3
save_freq: 5 # -1 to disable
save_limit: 3 # -1 to disable
save_checkpoint_path: null
Expand Down
4 changes: 2 additions & 2 deletions examples/qwen2_5_7b_math_grpo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@ set -x

MODEL_PATH=Qwen/Qwen2.5-7B-Instruct # replace it with your local file path

SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
FORMAT_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""

python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=hiyouga/math12k@train \
data.val_files=hiyouga/math12k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
data.format_prompt="${FORMAT_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
trainer.experiment_name=qwen2_5_7b_math_grpo \
trainer.n_gpus_per_node=8
5 changes: 2 additions & 3 deletions examples/qwen2_5_vl_32b_geo3k_grpo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,19 @@ set -x

MODEL_PATH=Qwen/Qwen2.5-VL-32B-Instruct # replace it with your local file path

SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
FORMAT_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""

python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=hiyouga/geometry3k@train \
data.val_files=hiyouga/geometry3k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
data.format_prompt="${FORMAT_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.actor.micro_batch_size_per_device_for_update=1 \
worker.actor.micro_batch_size_per_device_for_experience=8 \
worker.actor.fsdp.torch_dtype=bf16 \
worker.actor.optim.strategy=adamw_bf16 \
worker.rollout.tensor_parallel_size=8 \
worker.rollout.enable_chunked_prefill=false \
trainer.experiment_name=qwen2_5_vl_32b_geo_grpo \
trainer.n_gpus_per_node=8
5 changes: 2 additions & 3 deletions examples/qwen2_5_vl_3b_geo3k_grpo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,15 @@ set -x

MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct # replace it with your local file path

SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
FORMAT_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""

python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=hiyouga/geometry3k@train \
data.val_files=hiyouga/geometry3k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
data.format_prompt="${FORMAT_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.tensor_parallel_size=1 \
worker.rollout.enable_chunked_prefill=false \
trainer.experiment_name=qwen2_5_vl_3b_geo_grpo \
trainer.n_gpus_per_node=2
5 changes: 2 additions & 3 deletions examples/qwen2_5_vl_7b_geo3k_grpo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@ set -x

MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path

SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
FORMAT_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""

python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=hiyouga/geometry3k@train \
data.val_files=hiyouga/geometry3k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
data.format_prompt="${FORMAT_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.enable_chunked_prefill=false \
trainer.experiment_name=qwen2_5_vl_7b_geo_grpo \
trainer.n_gpus_per_node=8
5 changes: 2 additions & 3 deletions examples/qwen2_5_vl_7b_geo3k_reinforce.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,15 @@ set -x

MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path

SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
FORMAT_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""

python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=hiyouga/geometry3k@train \
data.val_files=hiyouga/geometry3k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
data.format_prompt="${FORMAT_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.enable_chunked_prefill=false \
algorithm.adv_estimator=reinforce_plus_plus \
trainer.experiment_name=qwen2_5_vl_7b_geo_reinforce_pp \
trainer.n_gpus_per_node=8
5 changes: 2 additions & 3 deletions examples/qwen2_5_vl_7b_geo3k_swanlab.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,15 @@ set -x

MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct # replace it with your local file path

SYSTEM_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
FORMAT_PROMPT="""You FIRST think about the reasoning process as an internal monologue and then provide the final answer.
The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."""

python3 -m verl.trainer.main \
config=examples/config.yaml \
data.train_files=hiyouga/geometry3k@train \
data.val_files=hiyouga/geometry3k@test \
data.system_prompt="${SYSTEM_PROMPT}" \
data.format_prompt="${FORMAT_PROMPT}" \
worker.actor.model.model_path=${MODEL_PATH} \
worker.rollout.enable_chunked_prefill=false \
trainer.experiment_name=qwen2_5_vl_7b_geo_grpo \
trainer.logger=['console','swanlab'] \
trainer.n_gpus_per_node=8
Loading