Skip to content

Commit 4835627

Browse files
JRD971000ko3n1g
authored andcommitted
add nmh HF conversion (NVIDIA-NeMo#13941)
* add nmh HF conversion * Apply isort and black reformatting Signed-off-by: JRD971000 <[email protected]> * add license header * restore sft nmh test * Apply isort and black reformatting Signed-off-by: ko3n1g <[email protected]> --------- Signed-off-by: JRD971000 <[email protected]> Signed-off-by: ko3n1g <[email protected]> Co-authored-by: JRD971000 <[email protected]> Co-authored-by: ko3n1g <[email protected]>
1 parent 1d221c1 commit 4835627

File tree

6 files changed

+91
-17
lines changed

6 files changed

+91
-17
lines changed

.github/workflows/cicd-main-nemo2.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,7 @@ jobs:
6868
- script: L2_NeMo_2_SSM_Pretraining
6969
runner: self-hosted-azure
7070
- script: L2_NeMo_2_SSM_Finetuning
71-
runner: self-hosted-azure
72-
is-optional: true
71+
runner: self-hosted-azure-gpus-2-h100
7372
- script: L2_NeMo_2_HF_MODEL_IMPORT
7473
runner: self-hosted-azure
7574
- script: L2_NeMo_2_jit_callback
@@ -239,6 +238,8 @@ jobs:
239238
runner: self-hosted-azure
240239
- script: L2_NeMo_2_Conversion_Test_Nemotron
241240
runner: self-hosted-azure
241+
- script: L2_NeMo_2_Conversion_Test_Nemotron_H_4B
242+
runner: self-hosted-azure
242243
- script: L2_NeMo_2_Conversion_Test_Phi3Mini
243244
runner: self-hosted-azure
244245
- script: L2_NeMo_2_Conversion_Test_Qwen2

nemo/collections/llm/gpt/model/ssm.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -687,14 +687,21 @@ def config(self):
687687
source: SSMConfig = io.load_context(str(self), subpath="model.config")
688688

689689
# TODO @ataghibakhsh: Change AutoConfig to NemotronHConfig once merged to HF
690+
691+
# Check for local model path from environment variable first
692+
local_model_path = os.environ.get('HF_LOCAL_MODEL_PATH')
690693
if type(source) == NemotronHConfig4B:
691-
hf_config = AutoConfig.from_pretrained("nvidia/Nemotron-H-4B-Base-8K", trust_remote_code=True)
694+
model_path = local_model_path if local_model_path else "nvidia/Nemotron-H-4B-Base-8K"
695+
hf_config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
692696
elif type(source) == NemotronHConfig8B:
693-
hf_config = AutoConfig.from_pretrained("nvidia/Nemotron-H-8B-Base-8K", trust_remote_code=True)
697+
model_path = local_model_path if local_model_path else "nvidia/Nemotron-H-8B-Base-8K"
698+
hf_config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
694699
elif type(source) == NemotronHConfig47B:
695-
hf_config = AutoConfig.from_pretrained("nvidia/Nemotron-H-47B-Base-8K", trust_remote_code=True)
700+
model_path = local_model_path if local_model_path else "nvidia/Nemotron-H-47B-Base-8K"
701+
hf_config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
696702
elif type(source) == NemotronHConfig56B:
697-
hf_config = AutoConfig.from_pretrained("nvidia/Nemotron-H-56B-Base-8K", trust_remote_code=True)
703+
model_path = local_model_path if local_model_path else "nvidia/Nemotron-H-56B-Base-8K"
704+
hf_config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
698705
else:
699706
raise ValueError(f"Unsupported model size: {source}")
700707

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import argparse
16+
17+
from nemo.collections import llm
18+
from nemo.collections.llm.gpt.model.ssm import HFNemotronHExporter, HFNemotronHImporter
19+
20+
21+
def get_args():
22+
parser = argparse.ArgumentParser()
23+
parser.add_argument("--conversion_type", type=str, required=True)
24+
parser.add_argument("--source_ckpt", type=str, required=True)
25+
parser.add_argument("--target_ckpt", type=str, required=True)
26+
return parser.parse_args()
27+
28+
29+
if __name__ == "__main__":
30+
31+
args = get_args()
32+
nmh_config = llm.NemotronHConfig4B()
33+
if args.conversion_type == "NEMO2_TO_HF":
34+
35+
exporter = HFNemotronHExporter(args.source_ckpt, model_config=nmh_config)
36+
exporter.apply(args.target_ckpt)
37+
38+
elif args.conversion_type == "HF_TO_NEMO2":
39+
40+
exporter = HFNemotronHImporter(args.source_ckpt)
41+
exporter.apply(args.target_ckpt)
42+
43+
else:
44+
raise ValueError(f"Invalid conversion type: {args.conversion_type}")
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
export HF_LOCAL_MODEL_PATH=/home/TestData/llm/models/nemotronh-4B
16+
NEMO_MODEL_TYPE=MambaModel
17+
NEMO_MODEL_CONFIG=NemotronHConfig4B
18+
NEMO_OUTPUT_PATH=/tmp/output_nemo2_ckpt
19+
HF_OUTPUT_PATH=/tmp/output_hf_ckpt
20+
21+
coverage run -a --data-file=/workspace/.coverage --source=/workspace/nemo tests/collections/llm/conversion/test_nmh_conversion.py --conversion_type=HF_TO_NEMO2 --source_ckpt=${HF_LOCAL_MODEL_PATH} --target_ckpt=${NEMO_OUTPUT_PATH}
22+
coverage run -a --data-file=/workspace/.coverage --source=/workspace/nemo tests/collections/llm/conversion/test_nmh_conversion.py --conversion_type=NEMO2_TO_HF --source_ckpt=${NEMO_OUTPUT_PATH} --target_ckpt=${HF_OUTPUT_PATH}

tests/functional_tests/L2_NeMo_2_SSM_Finetuning.sh

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,22 +14,22 @@
1414
coverage run -a --data-file=/workspace/.coverage --source=/workspace/nemo tests/collections/llm/gpt/model/test_nemotronh.py \
1515
--num-nodes=1 \
1616
--devices=2 \
17-
--max-steps=10 \
18-
--val-check-interval=10 \
17+
--max-steps=20 \
18+
--val-check-interval=20 \
1919
--experiment-dir=/tmp/nlp_megatron_mamba_nemo-ux-mamba_cicd_test_sft/$RUN_ID \
20-
--ckpt-dir="/mnt/datadrive/TestData/nlp/megatron_mamba/toy_nm5" \
20+
--ckpt-dir="/home/TestData/nlp/megatron_mamba/toy_nmh" \
2121
--vocab-file="/home/TestData/nlp/megatron_mamba/nm5_tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json" \
22-
--sft \
23-
--restore-optimizer-from-ckpt \
22+
--dataset-dir="/home/TestData/nlp/megatron_mamba/toy_ssm_dataset/legal_pile_text_document" \
2423
--seq-length=512 \
25-
--hybrid-override-pattern="M-M*" \
26-
--num-layers=4 \
24+
--restore-optimizer-from-ckpt \
25+
--hybrid-override-pattern="M-*" \
26+
--num-layers=3 \
2727
--tensor-parallel-size=2 \
2828
--pipeline-model-parallel-size=1 \
2929
--context-parallel-size=1 \
3030
--global-batch-size=8 \
3131
--micro-batch-size=1 \
32-
--model-size="8B" \
32+
--model-size="4B" \
3333
--clip-grad 1 \
3434
--lr=0.0003 \
3535
--warmup-steps=0 \

tests/functional_tests/L2_NeMo_2_SSM_Pretraining.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,14 @@ coverage run -a --data-file=/workspace/.coverage --source=/workspace/nemo tests/
2020
--vocab-file="/home/TestData/nlp/megatron_mamba/nm5_tokenizer/multiMixV8.gpt4o_nc_sd.500000.128k.vocab.json" \
2121
--dataset-dir="/home/TestData/nlp/megatron_mamba/toy_ssm_dataset/legal_pile_text_document" \
2222
--seq-length=512 \
23-
--hybrid-override-pattern="M-M*" \
24-
--num-layers=4 \
23+
--hybrid-override-pattern="M-*" \
24+
--num-layers=3 \
2525
--tensor-parallel-size=1 \
2626
--pipeline-model-parallel-size=1 \
2727
--context-parallel-size=1 \
2828
--global-batch-size=8 \
2929
--micro-batch-size=1 \
30-
--model-size="8B" \
30+
--model-size="4B" \
3131
--clip-grad 1 \
3232
--lr=0.0003 \
3333
--warmup-steps=0 \

0 commit comments

Comments
 (0)