Add cp per token loss check (#14282)

cuichenx · chtruong814 · commit b26bf7fa48fe · 2025-08-10T07:02:04.000-05:00
* add cp per token loss check

Signed-off-by: Chen Cui &lt;chcui@nvidia.com&gt;

* Apply isort and black reformatting

Signed-off-by: cuichenx &lt;cuichenx@users.noreply.github.com&gt;

---------

Signed-off-by: Chen Cui &lt;chcui@nvidia.com&gt;
Signed-off-by: cuichenx &lt;cuichenx@users.noreply.github.com&gt;
Co-authored-by: cuichenx &lt;cuichenx@users.noreply.github.com&gt;
Signed-off-by: Charlie Truong &lt;chtruong@nvidia.com&gt;
diff --git a/nemo/collections/llm/api.py b/nemo/collections/llm/api.py
@@ -35,6 +35,7 @@
     EvaluationTarget,
     MisconfigurationError,
 )
+from nemo.collections.llm.gpt.data.fine_tuning import FineTuningDataModule
 from nemo.collections.llm.modelopt import (
     DistillationGPTModel,
     ExportConfig,
@@ -1359,6 +1360,10 @@ def _validate_config(
                     assert (
                         model.config.seq_length % (trainer.strategy.context_parallel_size * 2) == 0
                     ), 'Sequence length must be divisible by 2 * context parallel size if context parallel is used.'
+                if isinstance(data, FineTuningDataModule):
+                    assert model.config.calculate_per_token_loss, (
+                        "When finetuning with CP>1, " "model.config.calculate_per_token_loss must be True"
+                    )
 
         # EP validation
         if trainer.strategy.expert_model_parallel_size > 1: