[misc] update baselines & docker image (#256)

hiyouga · web-flow · commit 068cb3e78639 · 2025-04-23T14:57:24.000+08:00
diff --git a/Dockerfile b/Dockerfile
@@ -41,13 +41,17 @@ RUN pip config set global.index-url "${PIP_INDEX}" && \
 # Uninstall nv-pytorch fork
 RUN pip uninstall -y torch torchvision torchaudio \
     pytorch-quantization pytorch-triton torch-tensorrt \
-    xgboost transformer_engine flash_attn apex megatron-core grpcio
+    transformer_engine flash_attn apex megatron-core \
+    xgboost opencv grpcio
 
-# Install torch-2.6.0+cu124 + vllm-0.8.3
+# Fix cv2
+RUN rm -rf /usr/local/lib/python3.10/dist-packages/cv2
+
+# Install torch-2.6.0+cu124 + vllm-0.8.4
 # torch-2.6.0+cu124: cxx11abi=False
 # torch-2.6.0+cu126: cxx11abi=True
 # see https://github.com/flashinfer-ai/flashinfer/issues/911
-RUN pip install --no-cache-dir "vllm==0.8.3" "torch==2.6.0" "torchvision==0.21.0" "torchaudio==2.6.0" tensordict torchdata \
+RUN pip install --no-cache-dir "vllm==0.8.4" "torch==2.6.0" "torchvision==0.21.0" "torchaudio==2.6.0" tensordict torchdata \
     "transformers[hf_xet]>=4.51.0" accelerate datasets peft hf-transfer \
     "numpy<2.0.0" "pyarrow>=15.0.0" pandas \
     ray[default] codetiming hydra-core pylatexenc qwen-vl-utils wandb liger-kernel mathruler \
diff --git a/Dockerfile.legacy b/Dockerfile.legacy
@@ -42,27 +42,30 @@ RUN pip config set global.index-url "${PIP_INDEX}" && \
 # Uninstall nv-pytorch fork
 RUN pip uninstall -y torch torchvision torchaudio \
     pytorch-quantization pytorch-triton torch-tensorrt \
-    xgboost transformer_engine flash_attn apex megatron-core
+    transformer_engine flash_attn apex megatron-core \
+    xgboost opencv grpcio
+
+# Fix cv2
+RUN rm -rf /usr/local/lib/python3.10/dist-packages/cv2
 
 # Install vllm-0.7.4-nightly
 RUN pip install --no-cache-dir vllm --pre --extra-index-url "https://wheels.vllm.ai/${VLLM_COMMIT}" && \
     git clone -b verl_v1 https://github.com/hiyouga/vllm.git && \
     cp -r vllm/vllm/ /usr/local/lib/python3.10/dist-packages/
 
 # Install torch-2.5.1
-RUN pip install --no-cache-dir torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 tensordict torchdata \
-    transformers>=4.49.0 accelerate datasets peft hf-transfer \
-    ray[default] codetiming hydra-core pandas pyarrow>=15.0.0 pylatexenc qwen-vl-utils wandb liger-kernel mathruler \
+RUN pip install --no-cache-dir "torch==2.5.1" "torchvision==0.20.1" "torchaudio==2.5.1" tensordict torchdata \
+    "transformers>=4.49.0" accelerate datasets peft hf-transfer \
+    ray[default] codetiming hydra-core pandas "pyarrow>=15.0.0" pylatexenc qwen-vl-utils wandb liger-kernel mathruler \
     pytest yapf py-spy pyext pre-commit ruff
 
 # Install flash_attn-2.7.4.post1
 RUN wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
     pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
 
-# Fix cv2
+# Fix packages
 RUN pip uninstall -y pynvml nvidia-ml-py && \
-    pip install --no-cache-dir nvidia-ml-py>=12.560.30 opencv-python-headless==4.8.0.74 fastapi==0.115.6 && \
-    pip install --no-cache-dir --upgrade optree>=0.13.0
+    pip install --no-cache-dir --upgrade "nvidia-ml-py>=12.560.30" "fastapi[standard]>=0.115.0" "optree>=0.13.0" "pydantic>=2.9" "grpcio>=1.62.1"
 
 # Reset pip config
 RUN pip config unset global.index-url && \
diff --git a/README.md b/README.md
@@ -42,7 +42,7 @@ We provide a [Dockerfile](./Dockerfile) to easily build environments.
 We recommend using the [pre-built docker image](https://hub.docker.com/r/hiyouga/verl) in EasyR1.
 
 ```bash
-docker pull hiyouga/verl:ngc-th2.6.0-cu126-vllm0.8.3-flashinfer0.2.2-cxx11abi0
+docker pull hiyouga/verl:ngc-th2.6.0-cu126-vllm0.8.4-flashinfer0.2.2-cxx11abi0
 ```
 
 ### Hardware Requirements
@@ -138,7 +138,7 @@ We also reproduced the following two baselines of the [R1-V](https://github.com/
 
 ## Performance Baselines
 
-See [Baselines.md](assets/baselines.md).
+See [baselines.md](assets/baselines.md).
 
 ## Awesome Work using EasyR1
 
diff --git a/assets/baselines.md b/assets/baselines.md
@@ -8,31 +8,31 @@ Welcome to contribute new baselines!
 
 ## Algorithm Baselines
 
-### [Qwen2.5-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct)
+### [Qwen2.5-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) on [Math12k](https://huggingface.co/datasets/hiyouga/math12k)
 
-| Size | Algorithm   | Bits | Dataset | LR   | KL   | Test Score |
-| ---- | ----------- | ---- | ------- | ---- | ---- | ---------- |
-| 7B   | GRPO        | AMP  | Math12k | 1e-6 | 1e-2 | 0.73->0.79 |
+| Size | Algorithm   | Bits | LR   | KL   | Test Score |
+| ---- | ----------- | ---- | ---- | ---- | ---------- |
+| 7B   | GRPO        | AMP  | 1e-6 | 1e-2 | 0.73->0.79 |
 
-### [Qwen2.5-VL-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct)
+### [Qwen2.5-VL-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) on [Geometry3k](https://huggingface.co/datasets/hiyouga/geometry3k)
 
-| Size | Algorithm   | Bits | Dataset | LR   | KL   | Test Score |
-| ---- | ----------- | ---- | ------- | ---- | ---- | ---------- |
-| 7B   | GRPO        | AMP  | Geo3k   | 1e-6 | 1e-2 | 0.39->0.52 |
-| 7B   | GRPO        | BF16 | Geo3k   | 1e-6 | 1e-2 | 0.39->0.52 |
-| 7B   | GRPO        | AMP  | Geo3k   | 1e-6 | 1e-3 | 0.39->0.52 |
-| 7B   | RLOO        | AMP  | Geo3k   | 1e-6 | 1e-2 | 0.39->0.53 |
-| 3B   | GRPO        | AMP  | Geo3k   | 1e-6 | 1e-2 | 0.27->0.44 |
-| 32B  | GRPO        | BF16 | Geo3k   | 1e-6 | 1e-2 | 0.46->0.61 |
+| Size | Algorithm   | Bits | LR   | KL   | Test Score |
+| ---- | ----------- | ---- | ---- | ---- | ---------- |
+| 7B   | GRPO        | AMP  | 1e-6 | 1e-2 | 0.39->0.52 |
+| 7B   | GRPO        | BF16 | 1e-6 | 1e-2 | 0.39->0.52 |
+| 7B   | GRPO        | AMP  | 1e-6 | 1e-3 | 0.39->0.52 |
+| 7B   | RLOO        | AMP  | 1e-6 | 1e-2 | 0.39->0.53 |
+| 3B   | GRPO        | AMP  | 1e-6 | 1e-2 | 0.27->0.44 |
+| 32B  | GRPO        | BF16 | 1e-6 | 1e-2 | 0.46->0.61 |
 
 > [!NOTE]
 > The hyper-parameters not listed are all the same as the default values.
 
 ## Performance Baselines
 
-### [Qwen2.5-VL-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct)
+### [Qwen2.5-VL-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) on [Geometry3k](https://huggingface.co/datasets/hiyouga/geometry3k)
 
-| Size | GPU Type      | Bits | Batch Size | vLLM util | vLLM TP | Peak Mem | Peak VRAM | Throughput | Sec per step | Actor MFU |
+| Size | GPU Type      | Bits | Batch Size | vLLM Util | vLLM TP | Peak Mem | Peak VRAM | Throughput | Sec per step | Actor MFU |
 | ---- | ------------- | ---- | ---------- | --------- | ------- | -------- | --------- | ---------- | ------------ | --------- |
 | 3B   | 8 * H100 80GB | AMP  | 4 / 16     | 0.6       | 2       | 120GB    | 35GB      | 1200       | 180s         | 6.3%      |
 | 7B   | 8 * H100 80GB | AMP  | 4 / 16     | 0.6       | 2       | 140GB    | 60GB      | 1200       | 180s         | 13.6%     |
@@ -41,8 +41,8 @@ Welcome to contribute new baselines!
 | 7B   | 8 * H100 80GB | BF16 | 4 / 16     | 0.6       | 2       | 150GB    | 50GB      | 1280       | 190s         | 13.9%     |
 | 32B  | 8 * H100 80GB | BF16 | 1 / 8      | 0.6       | 8       | 240GB    | 68GB      | 360        | 860s         | 11.2%     |
 
-- Batch size: micro_batch_size_per_device_for_update / micro_batch_size_per_device_for_experience
-- vLLM util: rollout.gpu_memory_utilization
+- Batch Size: micro_batch_size_per_device_for_update / micro_batch_size_per_device_for_experience
+- vLLM Util: rollout.gpu_memory_utilization
 - vLLM TP: rollout.tensor_parallel_size
 - Peak Mem: Peak CPU memory usage
 - Peak VRAM: Peak GPU memory usage
diff --git a/scripts/model_merger.py b/scripts/model_merger.py
@@ -98,8 +98,7 @@ def upload_model_to_huggingface(local_path: str, remote_path: str):
         total_shards = mesh.shape[-1]
         mesh_shape = (mesh.shape[-1],)
 
-    print(f"Processing model shards with {total_shards} in total.")
-
+    print(f"Processing {total_shards} model shards in total.")
     model_state_dict_lst = []
     model_state_dict_lst.append(state_dict)
     model_state_dict_lst.extend([""] * (total_shards - 1))