hiyouga · hiyouga · Mar 10, 2025 · Mar 9, 2025 · Mar 9, 2025 · Mar 9, 2025
diff --git a/README.md b/README.md
@@ -102,7 +102,9 @@ Please refer to the example datasets to prepare your own dataset.
 
 ## Other Baselines
 
+We also implemented the following two baselines from [R1-V](https://github.com/deep-agent/R1-V) project.
 - [CLEVR-70k-Counting](examples/run_qwen2_5_vl_3b_clevr.sh): Train the Qwen2.5-VL-3B-Instruct model on counting problem.
+- [GeoQA-8k](examples/run_qwen2_5_vl_3b_geoqa8k.sh): Train the Qwen2.5-VL-3B-Instruct model on GeoQA problem.
 
 ## TODO
 

diff --git a/examples/run_qwen2_5_vl_3b_geoqa8k.sh b/examples/run_qwen2_5_vl_3b_geoqa8k.sh
@@ -0,0 +1,23 @@
+set -x
+
+export VLLM_ATTENTION_BACKEND=XFORMERS
+export VLLM_USE_V1=0
+
+MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct  # replace it with your local file path
+
+SYSTEM_PROMPT="""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant
+ first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning
+ process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e.,
+ <think> reasoning process here </think><answer> answer here </answer>"""
+
+python3 -m verl.trainer.main \
+    config=examples/grpo_example.yaml \
+    data.train_files=leonardPKU/GEOQA_8K_R1V@train \
+    data.val_files=leonardPKU/GEOQA_8K_R1V@test \
+    data.system_prompt="${SYSTEM_PROMPT}" \
+    worker.actor.model.model_path=${MODEL_PATH} \
+    worker.rollout.tensor_parallel_size=1 \
+    worker.rollout.enable_chunked_prefill=false \
+    worker.reward.compute_score=r1v \
+    trainer.experiment_name=qwen2_5_vl_3b_geoqa8k \
+    trainer.n_gpus_per_node=8