Skip to content

Commit a9e232e

Browse files
committed
Update e2e_ascend CI config
update
1 parent 96e7071 commit a9e232e

File tree

7 files changed

+39
-80
lines changed

7 files changed

+39
-80
lines changed

.github/workflows/e2e_ascend.yml

Lines changed: 22 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -43,22 +43,17 @@ on:
4343
branches:
4444
- main
4545
paths:
46+
- ".github/workflows/e2e_ascend.yml"
4647
- "**/*.py"
48+
- "docs/ascend_tutorial/**"
49+
- "examples/**"
50+
- "recipe/**"
51+
- "tests/special_npu/**"
52+
- "tests/special_sanity/**"
53+
- "verl/**"
54+
- "pyproject.toml"
4755
- "requirements-npu.txt"
48-
# Other entrypoints
49-
- "!examples/**"
50-
- "!tests/**"
51-
- "!verl/trainer/main_*.py"
52-
- "!verl/trainer/fsdp_sft_trainer.py"
53-
# Recipes
54-
- "!recipe/**"
55-
# Entrypoints
56-
- ".github/workflows/e2e_ascend.yml"
57-
- "examples/data_preprocess/gsm8k.py"
58-
- "examples/data_preprocess/geo3k.py"
59-
- "tests/special_e2e/ppo_trainer"
60-
- "verl/trainer/main_ppo.py"
61-
- "verl/trainer/config/ppo_trainer.yaml"
56+
- "setup.py"
6257

6358
# Cancel jobs on the same ref if a new one is triggered
6459
concurrency:
@@ -143,23 +138,20 @@ jobs:
143138
ray stop --force
144139
bash tests/special_npu/run_qwen2_5_05b_dapo.sh
145140
rm -rf $HOME/ckpts
146-
- name: Running gsm8k e2e qwen3 training tests with GRPO on ASCEND NPU
147-
run: |
148-
ray stop --force
149-
bash tests/special_npu/run_qwen3_06b_grpo.sh
150-
rm -rf $HOME/ckpts
151-
- name: Running gsm8k e2e qwen3 training tests with PPO on ASCEND NPU
152-
run: |
153-
ray stop --force
154-
bash tests/special_npu/run_qwen3_06b_ppo.sh
155-
rm -rf $HOME/ckpts
156-
- name: Running gsm8k e2e training tests with GRPO MindSpeed on ASCEND NPU
157-
run: |
158-
ray stop --force
159-
USE_DIST_CKPT=True bash tests/special_npu/run_qwen2_5_05b_grpo_mindspeed.sh
160-
rm -rf $HOME/dist_ckpt/qwen2_5_05b_grpo_mindspeed
161-
rm -rf $HOME/ckpts
162141
- name: Running NPU profiling unit tests
163142
run: |
164143
ray stop --force
165144
pytest -s -x tests/utils/test_special_mstx_profile.py
145+
# TODO Reopen this case after CI image update with libnuma.so available
146+
# - name: Running gsm8k e2e training tests with GRPO MindSpeed on ASCEND NPU
147+
# run: |
148+
# ray stop --force
149+
# USE_DIST_CKPT=True bash tests/special_npu/run_qwen2_5_05b_grpo_mindspeed.sh
150+
# rm -rf $HOME/dist_ckpt/qwen2_5_05b_grpo_mindspeed
151+
# rm -rf $HOME/ckpts
152+
# TODO Reopen this case after solving the error
153+
# - name: Running gsm8k e2e qwen3 training tests with PPO on ASCEND NPU
154+
# run: |
155+
# ray stop --force
156+
# bash tests/special_npu/run_qwen3_06b_ppo.sh
157+
# rm -rf $HOME/ckpts

tests/special_npu/run_qwen2_5_05b_dapo.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env bash
22
set -xeuo pipefail
33

4-
NUM_GPUS=${NUM_GPUS:-8}
4+
NUM_GPUS=${NUM_GPUS:-16}
55

66
MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-0.5B-Instruct}
77
MODEL_PATH=${MODEL_PATH:-${HOME}/models/${MODEL_ID}}

tests/special_npu/run_qwen2_5_05b_grpo.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ python3 -m verl.trainer.main_ppo \
3636
trainer.logger=console \
3737
trainer.project_name='verl_grpo_example_gsm8k' \
3838
trainer.experiment_name='qwen2_7b_function_rm' \
39-
trainer.n_gpus_per_node=8 \
39+
trainer.n_gpus_per_node=16 \
4040
trainer.nnodes=1 \
4141
trainer.save_freq=-1 \
4242
trainer.test_freq=5 \

tests/special_npu/run_qwen2_5_05b_grpo_mindspeed.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ python3 -m verl.trainer.main_ppo --config-path=config \
5959
trainer.logger=console \
6060
trainer.project_name='verl_grpo_example_gsm8k' \
6161
trainer.experiment_name='qwen2_7b_function_rm' \
62-
trainer.n_gpus_per_node=8 \
62+
trainer.n_gpus_per_node=16 \
6363
trainer.nnodes=1 \
6464
trainer.save_freq=-1 \
6565
trainer.test_freq=5 \

tests/special_npu/run_qwen2_5_vl_3b_npu.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ python3 -m verl.trainer.main_ppo \
1818
actor_rollout_ref.model.path=Qwen/Qwen2.5-VL-3B-Instruct \
1919
actor_rollout_ref.actor.optim.lr=1e-6 \
2020
actor_rollout_ref.model.use_remove_padding=True \
21-
actor_rollout_ref.actor.ppo_mini_batch_size=16 \
21+
actor_rollout_ref.actor.ppo_mini_batch_size=32 \
2222
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2 \
2323
actor_rollout_ref.actor.use_kl_loss=True \
2424
actor_rollout_ref.actor.kl_loss_coef=0.01 \
@@ -44,7 +44,7 @@ python3 -m verl.trainer.main_ppo \
4444
trainer.logger=console \
4545
trainer.project_name='verl_grpo_example_geo3k' \
4646
trainer.experiment_name='qwen2_5_vl_3b_function_rm' \
47-
trainer.n_gpus_per_node=8 \
47+
trainer.n_gpus_per_node=16 \
4848
trainer.nnodes=1 \
4949
trainer.save_freq=-1 \
5050
trainer.test_freq=-1 \

tests/special_npu/run_qwen3_06b_grpo.sh

Lines changed: 0 additions & 44 deletions
This file was deleted.

verl/models/transformers/qwen2_vl.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
)
2929
from transformers.utils import is_flash_attn_2_available, is_flash_attn_greater_or_equal_2_10
3030

31+
from verl.utils.device import is_npu_available
3132
from verl.utils.transformers_compat import is_transformers_version_in_range
3233
from verl.utils.ulysses import (
3334
gather_heads_scatter_seq,
@@ -46,9 +47,19 @@
4647

4748
_flash_supports_window_size = "window_size" in inspect.signature(flash_attn_func).parameters
4849
_flash_supports_deterministic = "deterministic" in inspect.signature(flash_attn_func).parameters
49-
_flash_deterministic_enabled = os.getenv("FLASH_ATTENTION_DETERMINISTIC", "0") == "1"
5050
_flash_use_top_left_mask = not is_flash_attn_greater_or_equal_2_10()
5151

52+
if is_npu_available:
53+
from transformers.integrations.npu_flash_attention import npu_flash_attn_func as flash_attn_func
54+
from transformers.integrations.npu_flash_attention import npu_flash_attn_varlen_func as flash_attn_varlen_func
55+
from transformers.modeling_flash_attention_utils import flash_attn_supports_top_left_mask
56+
57+
_flash_supports_window_size = "window_size" in inspect.signature(flash_attn_func).parameters
58+
_flash_supports_deterministic = "deterministic" in inspect.signature(flash_attn_func).parameters
59+
_flash_use_top_left_mask = flash_attn_supports_top_left_mask()
60+
61+
_flash_deterministic_enabled = os.getenv("FLASH_ATTENTION_DETERMINISTIC", "0") == "1"
62+
5263

5364
def get_rope_index(
5465
processor,

0 commit comments

Comments
 (0)