Update AVLM recipes for NeMo-CI runs (NVIDIA-NeMo#14397)

huvunvidia · guyueh1 · commit f35eda3455f9 · 2025-08-25T08:55:11.000-07:00
* Update avlm_8b.py

Signed-off-by: Huy Vu &lt;86480512+huvunvidia@users.noreply.github.com&gt;

* Update avlm_8b.py

Signed-off-by: Huy Vu &lt;86480512+huvunvidia@users.noreply.github.com&gt;

* Update avlm_8b.py

Signed-off-by: Huy Vu &lt;86480512+huvunvidia@users.noreply.github.com&gt;

---------

Signed-off-by: Huy Vu &lt;86480512+huvunvidia@users.noreply.github.com&gt;
Signed-off-by: Guyue Huang &lt;guyueh@nvidia.com&gt;
diff --git a/nemo/collections/avlm/recipes/avlm_8b.py b/nemo/collections/avlm/recipes/avlm_8b.py
@@ -24,7 +24,8 @@
 from nemo.collections.avlm import AVLMMockDataModule
 from nemo.collections.common.tokenizers.huggingface.auto_tokenizer import AutoTokenizer
 from nemo.collections.llm.peft import LoRA
-from nemo.collections.llm.recipes.log.default import tensorboard_logger
+from nemo.collections.llm.recipes.finetune_default import nemo_resume
+from nemo.collections.llm.recipes.log.default import default_resume, tensorboard_logger
 from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing
 from nemo.collections.llm.recipes.precision.mixed_precision import bf16_mixed
 from nemo.utils.exp_manager import TimingCallback
@@ -69,7 +70,13 @@ def finetune_recipe(
     num_nodes: int = 1,
     num_gpus_per_node: int = 8,
     peft_scheme: Optional[str] = 'none',
-    freeze_modules: Optional[dict] = None,
+    freeze_modules: Optional[dict] = {
+        "freeze_language_model": False,
+        "freeze_vision_model": True,
+        "freeze_audio_model": True,
+        "freeze_vision_projection": False,
+        "freeze_audio_projection": False,
+    },
 ) -> run.Partial:
     """
     Create a fine-tuning recipe for AVLM 8B model.
@@ -102,6 +109,7 @@ def finetune_recipe(
         pipeline_model_parallel_size=1,
         encoder_pipeline_model_parallel_size=0,
         pipeline_dtype=torch.bfloat16,
+        ckpt_async_save=False,
     )
 
     trainer = run.Config(
@@ -143,10 +151,7 @@ def finetune_recipe(
         ),
         log=llm.default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)),
         optim=distributed_fused_adam_with_cosine_annealing(max_lr=2.0e-05, min_lr=2.0e-07, warmup_steps=150),
-        resume=run.Config(
-            nl.AutoResume,
-            restore_config=run.Config(nl.RestoreConfig, path=checkpoint_path),
-        ),
+        resume=nemo_resume(checkpoint_path),
     )
 
     if peft_scheme is None or peft_scheme.lower() == 'none':
@@ -178,7 +183,13 @@ def pretrain_recipe(
     num_nodes: int = 1,
     num_gpus_per_node: int = 8,
     language_model_from_pretrained: Optional[str] = None,
-    freeze_modules: Optional[dict] = None,
+    freeze_modules: Optional[dict] = {
+        "freeze_language_model": True,
+        "freeze_vision_model": True,
+        "freeze_audio_model": True,
+        "freeze_vision_projection": False,
+        "freeze_audio_projection": False,
+    },
 ) -> run.Partial:
     """
     Create a Pre-training recipe for AVLM 8B model.
@@ -210,6 +221,7 @@ def pretrain_recipe(
         pipeline_model_parallel_size=1,
         encoder_pipeline_model_parallel_size=0,
         pipeline_dtype=torch.bfloat16,
+        ckpt_async_save=False,
     )
 
     trainer = run.Config(
@@ -252,6 +264,7 @@ def pretrain_recipe(
         ),
         log=llm.default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)),
         optim=distributed_fused_adam_with_cosine_annealing(max_lr=0.001, min_lr=2.0e-05, warmup_steps=150),
+        resume=default_resume(),
     )
 
     return recipe