11set -x
22
3- # 0. download the config
4- # only need to download the `configuration_deepseek.py`, `config.json`, `tokenizer_config.json`, `tokenizer.json` and `generation_config .json`
5- # remove the `quantization_config` in the `config.json`
6- # set `num_nextn_predict_layers=0` to disable MTP, which is not currently supported
3+ # # 0. download HF checkpoint
4+ # # remove the `quantization_config` in the `config .json`
5+ # # set `num_nextn_predict_layers=0` to disable MTP, which is not currently supported
6+ # huggingface-cli download deepseek-ai/DeepSeek-V3-0324
77
8- huggingface-cli download deepseek-ai/DeepSeek-V3-0324 configuration_deepseek.py config.json
9-
10- # 1. download the dist_ckpt format model from https://huggingface.co/BearBiscuit05/dpsk-v3-671B-BF16-dist_ckpt/tree/main
11- # change the HF_MODEL_PATH and DIST_CKPT_PATH to your own path
12- DIST_CKPT_PATH=" <path_to_dist_ckpt>"
8+ # no offline dist checkpoint needed, now with mbridge>=0.13.0, we can directly init model from huggingface downloaded fp8 weights
9+ # tested on docker://verlai/verl:app-verl0.5-vllm0.10.0-mcore0.13.0-te2.2
1310LLM=" <path_to_dsv3_config>"
1411
1512
1613# 2. run the script
17- gsm8k_train_path=/data/gsm8k/train.parquet
18- gsm8k_test_path=/data/gsm8k/test.parquet
14+ gsm8k_train_path=/root/ data/gsm8k/train.parquet
15+ gsm8k_test_path=/root/ data/gsm8k/test.parquet
1916train_files=$gsm8k_train_path
2017test_files=$gsm8k_test_path
2118
@@ -33,30 +30,32 @@ CRITIC_GRAD_OFFLOAD=${CRITIC_GRAD_OFFLOAD:-$COMMON_GRAD_OFFLOAD}
3330CRITIC_OPTIMIZER_OFFLOAD=${CRITIC_OPTIMIZER_OFFLOAD:- $COMMON_OPTIMIZER_OFFLOAD }
3431RM_PARAM_OFFLOAD=${RM_PARAM_OFFLOAD:- $COMMON_PARAM_OFFLOAD }
3532
36- # 512 H20(96GB )
37- NODES=64
33+ # 256 H100(80GB )
34+ NODES=32
3835PP=16
3936TP=1
40- EP=32
37+ EP=16
4138ETP=1
4239INFER_TP=32
4340# consider TP/ETP, and enable recompute if short of memory
4441
4542# full recompute
46- # +actor_rollout_ref.actor.megatron.override_transformer_config.recompute_method=uniform \
47- # +actor_rollout_ref.actor.megatron.override_transformer_config.recompute_granularity=full \
48- # +actor_rollout_ref.actor.megatron.override_transformer_config.recompute_num_layers=1 \
4943
5044n_resp_per_prompt=4
45+ max_prompt_length=2048
46+ max_response_length=4096
47+ use_dynamic_bsz=True
48+ actor_ppo_max_token_len=$(( (max_prompt_length + max_response_length) * 1 ))
49+ infer_ppo_max_token_len=$(( (max_prompt_length + max_response_length) * 3 ))
5150
5251# RAY_ADDRESS='auto' ray job submit --working-dir . --
5352python3 -m verl.trainer.main_ppo --config-path=./config --config-name=' ppo_megatron_trainer' \
5453 algorithm.adv_estimator=grpo \
5554 data.train_files=" $train_files " \
5655 data.val_files=" $test_files " \
5756 data.train_batch_size=512 \
58- data.max_prompt_length=2048 \
59- data.max_response_length=4096 \
57+ data.max_prompt_length=$max_prompt_length \
58+ data.max_response_length=$max_response_length \
6059 data.filter_overlong_prompts=True \
6160 data.truncation=' error' \
6261 actor_rollout_ref.model.path=$LLM \
@@ -81,8 +80,15 @@ python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megat
8180 trainer.nnodes=$NODES \
8281 trainer.save_freq=-1 \
8382 trainer.test_freq=5 \
84- +actor_rollout_ref.actor.megatron.override_transformer_config.num_layers_in_first_pipeline_stage=3 \
85- +actor_rollout_ref.actor.megatron.override_transformer_config.num_layers_in_last_pipeline_stage=2 \
83+ actor_rollout_ref.model.use_fused_kernels=True \
84+ actor_rollout_ref.actor.use_dynamic_bsz=${use_dynamic_bsz} \
85+ actor_rollout_ref.ref.log_prob_use_dynamic_bsz=${use_dynamic_bsz} \
86+ actor_rollout_ref.rollout.log_prob_use_dynamic_bsz=${use_dynamic_bsz} \
87+ actor_rollout_ref.actor.ppo_max_token_len_per_gpu=${actor_ppo_max_token_len} \
88+ actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=${infer_ppo_max_token_len} \
89+ actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=${infer_ppo_max_token_len} \
90+ +actor_rollout_ref.actor.megatron.override_transformer_config.num_layers_in_first_pipeline_stage=4 \
91+ +actor_rollout_ref.actor.megatron.override_transformer_config.num_layers_in_last_pipeline_stage=1 \
8692 actor_rollout_ref.actor.megatron.pipeline_model_parallel_size=$PP \
8793 actor_rollout_ref.ref.megatron.pipeline_model_parallel_size=$PP \
8894 actor_rollout_ref.actor.megatron.tensor_model_parallel_size=$TP \
@@ -95,10 +101,10 @@ python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megat
95101 actor_rollout_ref.actor.megatron.optimizer_offload=${ACTOR_OPTIMIZER_OFFLOAD} \
96102 actor_rollout_ref.actor.megatron.grad_offload=${ACTOR_GRAD_OFFLOAD} \
97103 actor_rollout_ref.ref.megatron.param_offload=${REF_PARAM_OFFLOAD} \
98- actor_rollout_ref.actor.megatron.use_dist_checkpointing=True \
99- actor_rollout_ref.ref .megatron.use_dist_checkpointing=True \
100- actor_rollout_ref.actor.megatron.dist_checkpointing_path= $DIST_CKPT_PATH \
101- actor_rollout_ref.ref .megatron.dist_checkpointing_path= $DIST_CKPT_PATH \
104+ + actor_rollout_ref.actor.megatron.override_transformer_config.recompute_method=uniform \
105+ + actor_rollout_ref.actor .megatron.override_transformer_config.recompute_granularity=full \
106+ + actor_rollout_ref.actor.megatron.override_transformer_config.recompute_num_layers=1 \
107+ actor_rollout_ref.actor .megatron.use_mbridge=True \
102108 trainer.default_local_dir=$CKPT_DIR \
103109 trainer.val_before_train=False \
104110 trainer.total_epochs=100 $@
0 commit comments