Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions tests/special_e2e/sft/test_sft_engine_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,16 @@ BACKEND=fsdp SP_SIZE=2 FSDP_SIZE=-1 NUM_GPUS=8 FSDP_STRATEGY=fsdp bash tests/spe
echo "run with sp4 fsdp_size4 num_gpus8 fsdp_strategy fsdp"
BACKEND=fsdp SP_SIZE=4 FSDP_SIZE=4 NUM_GPUS=8 FSDP_STRATEGY=fsdp bash tests/special_e2e/sft/run_sft_engine_gsm8k.sh

# test with fsdp 2
echo "run with sp1 fsdp_size1 num_gpus1 fsdp_strategy fsdp2"
BACKEND=fsdp SP_SIZE=1 FSDP_SIZE=1 NUM_GPUS=1 FSDP_STRATEGY=fsdp2 bash tests/special_e2e/sft/run_sft_engine_gsm8k.sh

# TODO: toggle the follow tests when the grad norm of fsdp is fixed
# BACKEND=fsdp SP_SIZE=1 FSDP_SIZE=1 NUM_GPUS=1 FSDP_STRATEGY=fsdp2 bash tests/special_e2e/sft/run_sft_engine_gsm8k.sh
# BACKEND=fsdp SP_SIZE=1 FSDP_SIZE=2 NUM_GPUS=8 FSDP_STRATEGY=fsdp2 bash tests/special_e2e/sft/run_sft_engine_gsm8k.sh
# echo "run with sp1 fsdp_size-1 num_gpus8 fsdp_strategy fsdp2"
# BACKEND=fsdp SP_SIZE=1 FSDP_SIZE=-1 NUM_GPUS=8 FSDP_STRATEGY=fsdp2 bash tests/special_e2e/sft/run_sft_engine_gsm8k.sh
# echo "run with sp2 fsdp_size-1 num_gpus8 fsdp_strategy fsdp2"
# BACKEND=fsdp SP_SIZE=2 FSDP_SIZE=-1 NUM_GPUS=8 FSDP_STRATEGY=fsdp2 bash tests/special_e2e/sft/run_sft_engine_gsm8k.sh
# BACKEND=fsdp SP_SIZE=1 FSDP_SIZE=2 NUM_GPUS=8 FSDP_STRATEGY=fsdp2 bash tests/special_e2e/sft/run_sft_engine_gsm8k.sh
# BACKEND=fsdp SP_SIZE=4 FSDP_SIZE=4 NUM_GPUS=8 FSDP_STRATEGY=fsdp2 bash tests/special_e2e/sft/run_sft_engine_gsm8k.sh

# test with megatron
Expand Down
3 changes: 2 additions & 1 deletion verl/utils/seqlen_balancing.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,8 @@ def prepare_dynamic_batch(data: DataProto, max_token_len: int) -> tuple[list[Dat
for i, batch_idx in enumerate(batch_idx_list):
tensors = dict(batch[i])
non_tensors = {key: value[batch_idx] for key, value in data.non_tensor_batch.items()}
micro_batches.append(DataProto.from_dict(tensors, non_tensors, meta_info=data.meta_info))
meta_info = copy.deepcopy(data.meta_info)
micro_batches.append(DataProto.from_dict(tensors, non_tensors, meta_info=meta_info))

return micro_batches, batch_idx_list

Expand Down
Loading
Loading