We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 318055b commit 10c59afCopy full SHA for 10c59af
2 files changed
.github/workflows/integration_test_8gpu_features.yaml
@@ -76,5 +76,10 @@ jobs:
76
export TEST_WITH_ROCM=$([[ "${{ matrix.gpu-arch-type }}" == "rocm" ]] && echo 1 || echo 0)
77
python -m tests.integration_tests.run_tests --test_suite features $RUNNER_TEMP/artifacts-to-be-uploaded --ngpu 8
78
79
+ # Verify the accuracy.
80
+ export baseline_cmd='CONFIG_FILE="./torchtitan/models/llama3/train_configs/debug_model.toml" ./run_train.sh'
81
+ export baseline_cmd='CONFIG_FILE="./torchtitan/models/llama3/train_configs/debug_model.toml" ./run_train.sh --parallelism.data_parallel_replicate_degree=2'
82
+ python3 scripts/loss_compare.py . . --baseline-cmd=${baseline_cmd} --test-cmd=${test_cmd} --no-seed-checkpoint --steps=10
83
+
84
rm -rf $RUNNER_TEMP/artifacts-to-be-uploaded/*/checkpoint
85
rm -rf artifacts-to-be-uploaded/*/checkpoint
0 commit comments