NVIDIA-NeMo
diff --git a/‎.coveragerc‎
Lines changed: 5 additions & 1 deletion b/‎.coveragerc‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎.github/workflows/cicd-main-automodel.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/cicd-main-automodel.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/cicd-main-export-deploy.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/cicd-main-export-deploy.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/cicd-main-speech.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/cicd-main-speech.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/cicd-main.yml‎
Lines changed: 10 additions & 10 deletions b/‎.github/workflows/cicd-main.yml‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎docker/Dockerfile.ci.export_deploy‎
Lines changed: 29 additions & 28 deletions b/‎docker/Dockerfile.ci.export_deploy‎
Lines changed: 29 additions & 28 deletions
diff --git a/‎docker/common/install_dep.sh‎
Lines changed: 13 additions & 15 deletions b/‎docker/common/install_dep.sh‎
Lines changed: 13 additions & 15 deletions
diff --git a/‎examples/nlp/duplex_text_normalization/README.md‎
Lines changed: 0 additions & 2 deletions b/‎examples/nlp/duplex_text_normalization/README.md‎
Lines changed: 0 additions & 2 deletions
@@ -22,7 +22,11 @@ omit =
     nemo/collections/audio/parts/utils/maxine.py
 
     nemo/core/*
-    nemo/collections/common/*      
+    nemo/collections/common/*
+
+    /workspace/config-3.12.py
+    /workspace/config-3.py
+    /workspace/config.py
 
 [paths]
 source =
 
@@ -70,6 +70,7 @@ jobs:
             script: L2_VLM_HF_Transformer_PEFT_FSDP2
           - runner: self-hosted-azure-gpus-1
             script: L2_VLM_HF_Transformer_PEFT_4bit
+            is-optional: true
           - runner: self-hosted-azure
             script: L2_VLM_HF_Transformer_SFT_FSDP2
           - runner: self-hosted-azure
 
@@ -96,7 +96,7 @@ jobs:
             runner: self-hosted-azure-gpus-1
     needs: [unit-tests]
     runs-on: ${{ matrix.runner }}
-    name: ${{ matrix.is_optional && 'PLEASEFIXME_' || '' }}${{ matrix.script }}
+    name: ${{ matrix.is-optional && 'PLEASEFIXME_' || '' }}${{ matrix.script }}
     steps:
       - name: Checkout
         uses: actions/checkout@v4
@@ -109,4 +109,4 @@ jobs:
           script: ${{ matrix.script }}
           tests_to_run: ${{ inputs.test_to_run }}
           image: ${{ inputs.image-name }}
-          is_optional: ${{ matrix.is_optional || false }}
+          is_optional: ${{ matrix.is-optional || false }}
@@ -120,6 +120,7 @@ jobs:
             script: L2_Speech_Batch_Size_OOMptimizer
           - runner: self-hosted-azure
             script: Optional_L2_Speech_Batch_Size_OOMptimizer_Canary
+            is-optional: true
           - runner: self-hosted-azure
             script: L2_Speech_Transcription_Speech_to_Text_Transcribe
           - runner: self-hosted-azure
 
@@ -135,7 +135,7 @@ jobs:
     runs-on: ubuntu-latest
     environment: test
     if: |
-      needs.pre-flight.outputs.test_to_run != '[]' 
+      needs.pre-flight.outputs.test_to_run != '[]'
       && needs.pre-flight.outputs.components_to_run != '[]'
       && needs.pre-flight.outputs.is_ci_workload == 'false'
     steps:
@@ -147,10 +147,10 @@ jobs:
     uses: ./.github/workflows/_build_container.yml
     needs: [pre-flight, code-linting, cicd-wait-in-queue]
     if: |
-      needs.pre-flight.outputs.test_to_run != '[]' 
+      needs.pre-flight.outputs.test_to_run != '[]'
       && needs.pre-flight.outputs.components_to_run != '[]'
       && (
-        success() 
+        success()
         || (
           needs.cicd-wait-in-queue.result == 'skipped'
           && needs.pre-flight.outputs.is_ci_workload == 'true'
@@ -385,8 +385,8 @@ jobs:
 
       - name: Remove label if not cancelled
         if: |
-          steps.result.outputs.code != 'cancelled' 
-          && github.event.label.name == 'Run CICD' 
+          steps.result.outputs.code != 'cancelled'
+          && github.event.label.name == 'Run CICD'
           && github.event.pull_request.head.repo.full_name == github.repository
         env:
           GH_TOKEN: ${{ github.token }}
@@ -395,8 +395,8 @@ jobs:
 
       - name: Pipeline successful, add PR comment
         if: |
-          steps.result.outputs.code == 'success' 
-          && github.event_name == 'pull_request' 
+          steps.result.outputs.code == 'success'
+          && github.event_name == 'pull_request'
           && env.SLACK_WEBHOOK != ''
         uses: peter-evans/create-or-update-comment@v4
         env:
@@ -416,8 +416,8 @@ jobs:
 
       - name: "Pipeline not successful and not cancelled: Send Slack alert & create step summary"
         if: |
-          steps.result.outputs.code == 'failure' 
-          && github.event.label.name == 'Run CICD' 
+          steps.result.outputs.code == 'failure'
+          && github.event.label.name == 'Run CICD'
           && env.SLACK_WEBHOOK != ''
         env:
           SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
@@ -451,7 +451,7 @@ jobs:
       needs.pre-flight.outputs.test_to_run != '[]'
       && needs.pre-flight.outputs.components_to_run != '[]'
       && (
-        success() 
+        success()
         || needs.Nemo_CICD_Test.result == 'success'
       )
       && !cancelled()
 
@@ -1,6 +1,10 @@
 # Changelog
 
 <!-- Next changelog -->
+## NVIDIA Neural Modules 2.3.2
+
+This release addresses known security issues. For the latest NVIDIA Vulnerability Disclosure Information visit https://www.nvidia.com/en-us/security/, for acknowledgement please reach out to the NVIDIA PSIRT team at [email protected]
+
 ## NVIDIA Neural Modules 2.3.1
 
 ### Highlights
 
@@ -32,64 +32,65 @@ apt-get install -y bc
 apt-get clean
 EOF
 
-WORKDIR /tmp/NeMo
+WORKDIR /opt/NeMo
 ARG TRTLLM_REPO
 ARG TRTLLM_TAG
-RUN --mount=type=bind,source=docker/common/install_dep.sh,target=/tmp/NeMo/install_dep.sh bash -ex <<"EOF"
+RUN --mount=type=bind,source=docker/common/install_dep.sh,target=/opt/NeMo/install_dep.sh \
+  --mount=type=bind,source=external/patches,target=/opt/NeMo/external/patches bash -ex <<"EOF"
 
-  bash /tmp/NeMo/install_dep.sh --library trt --mode install
+  bash /opt/NeMo/install_dep.sh --library trt --mode install
 EOF
 
 FROM base-image AS trt-llm-wheel
-WORKDIR /tmp/NeMo
+WORKDIR /opt/NeMo
 ARG TRTLLM_REPO
 ARG TRTLLM_TAG
-RUN --mount=type=bind,source=docker/common/install_dep.sh,target=/tmp/NeMo/install_dep.sh bash -ex <<"EOF"
+RUN --mount=type=bind,source=docker/common/install_dep.sh,target=/opt/NeMo/install_dep.sh \
+  --mount=type=bind,source=external/patches,target=/opt/NeMo/external/patches bash -ex <<"EOF"
 
-  bash /tmp/NeMo/install_dep.sh --library trtllm --mode build
+  bash /opt/NeMo/install_dep.sh --library trtllm --mode build
 EOF
 
 FROM base-image as te-wheel
-WORKDIR /tmp/NeMo
+WORKDIR /opt/NeMo
 ARG TE_REPO
 ARG TE_TAG
-RUN --mount=type=bind,source=docker/common/install_dep.sh,target=/tmp/NeMo/install_dep.sh \
-  --mount=type=bind,source=external/patches,target=/tmp/NeMo/external/patches bash -ex <<"EOF"
+RUN --mount=type=bind,source=docker/common/install_dep.sh,target=/opt/NeMo/install_dep.sh \
+  --mount=type=bind,source=external/patches,target=/opt/NeMo/external/patches bash -ex <<"EOF"
 
-  bash /tmp/NeMo/install_dep.sh --library te --mode build
-  ls -al /tmp/Megatron-LM || true
+  bash /opt/NeMo/install_dep.sh --library te --mode build
+  ls -al /opt/Megatron-LM || true
 EOF
 
 FROM base-image as mcore-wheel
-WORKDIR /tmp/NeMo
+WORKDIR /opt/NeMo
 ARG MLM_REPO
 ARG MLM_TAG
-RUN --mount=type=bind,source=docker/common/install_dep.sh,target=/tmp/NeMo/install_dep.sh bash -ex <<"EOF"
+RUN --mount=type=bind,source=docker/common/install_dep.sh,target=/opt/NeMo/install_dep.sh bash -ex <<"EOF"
 
-  bash /tmp/NeMo/install_dep.sh --library mcore --mode build
-  ls -al /tmp/Megatron-LM || true
+  bash /opt/NeMo/install_dep.sh --library mcore --mode build
+  ls -al /opt/Megatron-LM || true
 EOF
 
 FROM base-image
-WORKDIR /tmp/NeMo
+WORKDIR /opt/NeMo
 ENV INSTALL_DIR="/opt"
 RUN \
   --mount=type=bind,from=trt-llm-wheel,source=/opt/wheels/trtllm,target=/opt/wheels/trtllm \
   --mount=type=bind,from=te-wheel,source=/opt/wheels/te,target=/opt/wheels/te \
   --mount=type=bind,from=mcore-wheel,source=/opt/wheels/mcore,target=/opt/wheels/mcore \
-  --mount=type=bind,source=requirements,target=/tmp/NeMo/requirements \
-  --mount=type=bind,source=tools/ctc_segmentation/requirements.txt,target=/tmp/NeMo/tools/ctc_segmentation/requirements.txt \
-  --mount=type=bind,source=docker/common/install_dep.sh,target=/tmp/NeMo/install_dep.sh \
-  --mount=type=bind,source=setup.py,target=/tmp/NeMo/setup.py \
-  --mount=type=bind,source=external/patches,target=/tmp/NeMo/external/patches \
-  --mount=type=bind,source=README.md,target=/tmp/NeMo/README.md \
-  --mount=type=bind,source=nemo/package_info.py,target=/tmp/NeMo/nemo/package_info.py \
-  --mount=type=bind,source=nemo/__init__.py,target=/tmp/NeMo/nemo/__init__.py bash -ex <<"EOF"
-
-    bash /tmp/NeMo/install_dep.sh --library all --mode install
+  --mount=type=bind,source=requirements,target=/opt/NeMo/requirements \
+  --mount=type=bind,source=tools/ctc_segmentation/requirements.txt,target=/opt/NeMo/tools/ctc_segmentation/requirements.txt \
+  --mount=type=bind,source=docker/common/install_dep.sh,target=/opt/NeMo/install_dep.sh \
+  --mount=type=bind,source=setup.py,target=/opt/NeMo/setup.py \
+  --mount=type=bind,source=external/patches,target=/opt/NeMo/external/patches \
+  --mount=type=bind,source=README.md,target=/opt/NeMo/README.md \
+  --mount=type=bind,source=nemo/package_info.py,target=/opt/NeMo/nemo/package_info.py \
+  --mount=type=bind,source=nemo/__init__.py,target=/opt/NeMo/nemo/__init__.py bash -ex <<"EOF"
+
+    bash /opt/NeMo/install_dep.sh --library all --mode install
     pip install --no-cache-dir ".[deploy,test]"
-    rm -rf $NEMO_DIR || true
-    
+
 EOF
 
 WORKDIR /workspace
 
@@ -69,6 +69,7 @@ trt() {
   git submodule update --init --recursive
   sed -i "/torch/d" requirements.txt
   git lfs pull
+  patch -p1 < $CURR/external/patches/trt_llm.patch
   popd
 
   if [[ "$mode" == "install" ]]; then
@@ -81,11 +82,12 @@ trt() {
       bash docker/common/install_ccache.sh
 
       . docker/common/install_tensorrt.sh \
-        --TRT_VER="10.9.0.34" \
-        --CUDA_VER="12.8" \
-        --CUDNN_VER="9.8.0.87-1" \
-        --NCCL_VER="2.25.1-1+cuda12.8" \
-        --CUBLAS_VER="12.8.4.1-1"
+        --TRT_VER="10.10.0.31" \
+        --CUDA_VER="12.9" \
+        --CUDNN_VER="9.9.0.52-1" \
+        --NCCL_VER="2.26.5-1+cuda12.9" \
+        --CUBLAS_VER="12.9.0.13-1" \
+        --NVRTC_VER="12.9.41-1"
       set -u
     fi
   fi
@@ -133,12 +135,15 @@ trtllm() {
   git submodule update --init --recursive
   sed -i "/torch/d" requirements.txt
   git lfs pull
+  patch -p1 < $CURR/external/patches/trt_llm.patch
   popd
 
   build() {
     if [[ "${NVIDIA_PYTORCH_VERSION}" != "" ]]; then
+      # CONDA_PREFIX causes an error in trt-llm's build script
+      unset CONDA_PREFIX
       cd $TRTLLM_DIR
-      python3 ./scripts/build_wheel.py --job_count $(nproc) --trt_root /usr/local/tensorrt --dist_dir $WHEELS_DIR --python_bindings --benchmarks
+      TORCH_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" python3 ./scripts/build_wheel.py --job_count $(nproc) --clean --trt_root /usr/local/tensorrt --dist_dir $WHEELS_DIR --python_bindings --benchmarks
     fi
   }
 
@@ -149,8 +154,7 @@ trtllm() {
       build
     fi
 
-    pip install --no-cache-dir $WHEELS_DIR/tensorrt_llm*.whl --extra-index-url https://pypi.nvidia.com &&
-      sed -i '57d' /usr/local/lib/python3.12/dist-packages/torch_tensorrt/dynamo/conversion/custom_ops_converters.py || true
+    pip install --no-cache-dir $WHEELS_DIR/tensorrt_llm*.whl --extra-index-url https://pypi.nvidia.com || true
   fi
 }
 
@@ -167,15 +171,14 @@ te() {
   fi
   pushd $TE_DIR
   git checkout -f $TE_TAG
-  patch -p1 </$CURR/external/patches/nemo_2.3.0_te.patch
   popd
 
   build() {
     if [[ "${NVIDIA_PYTORCH_VERSION}" != "" ]]; then
       cd $TE_DIR
       git submodule init
       git submodule update
-      pip wheel --wheel-dir $WHEELS_DIR/ $TE_DIR
+      pip wheel --wheel-dir $WHEELS_DIR/  --no-build-isolation $TE_DIR
     fi
   }
 
@@ -308,11 +311,6 @@ extra() {
       "git+https://github.com/NVIDIA/nvidia-resiliency-ext.git@b6eb61dbf9fe272b1a943b1b0d9efdde99df0737 ; platform_machine == 'x86_64'" # Compiling NvRX requires CUDA
     )
   fi
-  if [[ "${NVIDIA_PYTORCH_VERSION}" != "" ]]; then
-    patch \
-      /usr/local/lib/python3.12/dist-packages/torch/accelerator/__init__.py \
-      /$CURR/external/patches/torch_accelerator_144567_fix.patch
-  fi
 
   if [[ "$mode" == "install" ]]; then
     pip install --force-reinstall --no-deps --no-cache-dir "${DEPS[@]}"