add nmh HF conversion (NVIDIA-NeMo#13941)

JRD971000 · ko3n1g · nasretdinovr · commit 48356272bd40 · 2025-08-08T08:50:08.000-07:00
* add nmh HF conversion

* Apply isort and black reformatting

Signed-off-by: JRD971000 &lt;JRD971000@users.noreply.github.com&gt;

* add license header

* restore sft nmh test

* Apply isort and black reformatting

Signed-off-by: ko3n1g &lt;ko3n1g@users.noreply.github.com&gt;

---------

Signed-off-by: JRD971000 &lt;JRD971000@users.noreply.github.com&gt;
Signed-off-by: ko3n1g &lt;ko3n1g@users.noreply.github.com&gt;
Co-authored-by: JRD971000 &lt;JRD971000@users.noreply.github.com&gt;
Co-authored-by: ko3n1g &lt;ko3n1g@users.noreply.github.com&gt;
diff --git a/.github/workflows/cicd-main-nemo2.yml b/.github/workflows/cicd-main-nemo2.yml
@@ -68,8 +68,7 @@ jobs:
           - script: L2_NeMo_2_SSM_Pretraining
             runner: self-hosted-azure
           - script: L2_NeMo_2_SSM_Finetuning
-            runner: self-hosted-azure
-            is-optional: true
+            runner: self-hosted-azure-gpus-2-h100
           - script: L2_NeMo_2_HF_MODEL_IMPORT
             runner: self-hosted-azure
           - script: L2_NeMo_2_jit_callback
@@ -239,6 +238,8 @@ jobs:
             runner: self-hosted-azure
           - script: L2_NeMo_2_Conversion_Test_Nemotron
             runner: self-hosted-azure
+          - script: L2_NeMo_2_Conversion_Test_Nemotron_H_4B
+            runner: self-hosted-azure
           - script: L2_NeMo_2_Conversion_Test_Phi3Mini
             runner: self-hosted-azure
           - script: L2_NeMo_2_Conversion_Test_Qwen2
diff --git a/nemo/collections/llm/gpt/model/ssm.py b/nemo/collections/llm/gpt/model/ssm.py
@@ -687,14 +687,21 @@ def config(self):
         source: SSMConfig = io.load_context(str(self), subpath="model.config")
 
         # TODO @ataghibakhsh: Change AutoConfig to NemotronHConfig once merged to HF
+
+        # Check for local model path from environment variable first
+        local_model_path = os.environ.get('HF_LOCAL_MODEL_PATH')
         if type(source) == NemotronHConfig4B:
-            hf_config = AutoConfig.from_pretrained("nvidia/Nemotron-H-4B-Base-8K", trust_remote_code=True)
+            model_path = local_model_path if local_model_path else "nvidia/Nemotron-H-4B-Base-8K"
+            hf_config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
         elif type(source) == NemotronHConfig8B:
-            hf_config = AutoConfig.from_pretrained("nvidia/Nemotron-H-8B-Base-8K", trust_remote_code=True)
+            model_path = local_model_path if local_model_path else "nvidia/Nemotron-H-8B-Base-8K"
+            hf_config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
         elif type(source) == NemotronHConfig47B:
-            hf_config = AutoConfig.from_pretrained("nvidia/Nemotron-H-47B-Base-8K", trust_remote_code=True)
+            model_path = local_model_path if local_model_path else "nvidia/Nemotron-H-47B-Base-8K"
+            hf_config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
         elif type(source) == NemotronHConfig56B:
-            hf_config = AutoConfig.from_pretrained("nvidia/Nemotron-H-56B-Base-8K", trust_remote_code=True)
+            model_path = local_model_path if local_model_path else "nvidia/Nemotron-H-56B-Base-8K"
+            hf_config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
         else:
             raise ValueError(f"Unsupported model size: {source}")
 
diff --git a/tests/collections/llm/conversion/test_nmh_conversion.py b/tests/collections/llm/conversion/test_nmh_conversion.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+
+from nemo.collections import llm
+from nemo.collections.llm.gpt.model.ssm import HFNemotronHExporter, HFNemotronHImporter
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--conversion_type", type=str, required=True)
+    parser.add_argument("--source_ckpt", type=str, required=True)
+    parser.add_argument("--target_ckpt", type=str, required=True)
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+
+    args = get_args()
+    nmh_config = llm.NemotronHConfig4B()
+    if args.conversion_type == "NEMO2_TO_HF":
+
+        exporter = HFNemotronHExporter(args.source_ckpt, model_config=nmh_config)
+        exporter.apply(args.target_ckpt)
+
+    elif args.conversion_type == "HF_TO_NEMO2":
+
+        exporter = HFNemotronHImporter(args.source_ckpt)
+        exporter.apply(args.target_ckpt)
+
+    else:
+        raise ValueError(f"Invalid conversion type: {args.conversion_type}")
diff --git a/tests/functional_tests/L2_NeMo_2_Conversion_Test_Nemotron_H_4B.sh b/tests/functional_tests/L2_NeMo_2_Conversion_Test_Nemotron_H_4B.sh
@@ -0,0 +1,22 @@
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+export HF_LOCAL_MODEL_PATH=/home/TestData/llm/models/nemotronh-4B
+NEMO_MODEL_TYPE=MambaModel
+NEMO_MODEL_CONFIG=NemotronHConfig4B
+NEMO_OUTPUT_PATH=/tmp/output_nemo2_ckpt
+HF_OUTPUT_PATH=/tmp/output_hf_ckpt
+
+coverage run -a --data-file=/workspace/.coverage --source=/workspace/nemo tests/collections/llm/conversion/test_nmh_conversion.py --conversion_type=HF_TO_NEMO2 --source_ckpt=${HF_LOCAL_MODEL_PATH} --target_ckpt=${NEMO_OUTPUT_PATH}
+coverage run -a --data-file=/workspace/.coverage --source=/workspace/nemo tests/collections/llm/conversion/test_nmh_conversion.py --conversion_type=NEMO2_TO_HF --source_ckpt=${NEMO_OUTPUT_PATH} --target_ckpt=${HF_OUTPUT_PATH}
diff --git a/tests/functional_tests/L2_NeMo_2_SSM_Finetuning.sh b/tests/functional_tests/L2_NeMo_2_SSM_Finetuning.sh
@@ -14,22 +14,22 @@
 coverage run -a --data-file=/workspace/.coverage --source=/workspace/nemo tests/collections/llm/gpt/model/test_nemotronh.py \
     --num-nodes=1 \
     --devices=2 \
-    --max-steps=10 \
-    --val-check-interval=10 \
+    --max-steps=20 \
+    --val-check-interval=20 \
     --experiment-dir=/tmp/nlp_megatron_mamba_nemo-ux-mamba_cicd_test_sft/$RUN_ID \
-    --ckpt-dir="/mnt/datadrive/TestData/nlp/megatron_mamba/toy_nm5" \
+    --ckpt-dir="/home/TestData/nlp/megatron_mamba/toy_nmh" \
     --vocab-file="/home/TestData/nlp/megatron_mamba/nm5_tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json" \
-    --sft \
-    --restore-optimizer-from-ckpt \
+    --dataset-dir="/home/TestData/nlp/megatron_mamba/toy_ssm_dataset/legal_pile_text_document" \
     --seq-length=512 \
-    --hybrid-override-pattern="M-M*" \
-    --num-layers=4 \
+    --restore-optimizer-from-ckpt \
+    --hybrid-override-pattern="M-*" \
+    --num-layers=3 \
     --tensor-parallel-size=2 \
     --pipeline-model-parallel-size=1 \
     --context-parallel-size=1 \
     --global-batch-size=8 \
     --micro-batch-size=1 \
-    --model-size="8B" \
+    --model-size="4B" \
     --clip-grad 1 \
     --lr=0.0003 \
     --warmup-steps=0 \
diff --git a/tests/functional_tests/L2_NeMo_2_SSM_Pretraining.sh b/tests/functional_tests/L2_NeMo_2_SSM_Pretraining.sh
@@ -20,14 +20,14 @@ coverage run -a --data-file=/workspace/.coverage --source=/workspace/nemo tests/
             --vocab-file="/home/TestData/nlp/megatron_mamba/nm5_tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json" \
             --dataset-dir="/home/TestData/nlp/megatron_mamba/toy_ssm_dataset/legal_pile_text_document" \
             --seq-length=512 \
-            --hybrid-override-pattern="M-M*" \
-            --num-layers=4 \
+            --hybrid-override-pattern="M-*" \
+            --num-layers=3 \
             --tensor-parallel-size=1 \
             --pipeline-model-parallel-size=1 \
             --context-parallel-size=1 \
             --global-batch-size=8 \
             --micro-batch-size=1 \
-            --model-size="8B" \
+            --model-size="4B" \
             --clip-grad 1 \
             --lr=0.0003 \
             --warmup-steps=0 \