From 3249e25ad818cb254db7835bb9f6c133bded5197 Mon Sep 17 00:00:00 2001 From: Onur Yilmaz Date: Wed, 16 Jul 2025 17:34:56 -0400 Subject: [PATCH 1/6] Moving security fixes over here Signed-off-by: Onur Yilmaz --- nemo/export/tarutils.py | 10 ---- nemo/export/tensorrt_mm_exporter.py | 4 +- .../trt_llm/qnemo/qnemo_to_tensorrt_llm.py | 47 ++++++++++--------- 3 files changed, 25 insertions(+), 36 deletions(-) diff --git a/nemo/export/tarutils.py b/nemo/export/tarutils.py index 3b1f17780f1c..6367fa051db3 100644 --- a/nemo/export/tarutils.py +++ b/nemo/export/tarutils.py @@ -265,13 +265,3 @@ def keys(self): return self._path.iterdir() -def unpack_tarball(archive: str, dest_dir: str): - """ - Unpacks a tarball into a destination directory. - - Args: - archive (str): The path to the tarball. - dest_dir (str): The path to the destination directory. - """ - with tarfile.open(archive, mode="r") as tar: - tar.extractall(path=dest_dir) diff --git a/nemo/export/tensorrt_mm_exporter.py b/nemo/export/tensorrt_mm_exporter.py index 21a2a244d0d0..54914846fa79 100644 --- a/nemo/export/tensorrt_mm_exporter.py +++ b/nemo/export/tensorrt_mm_exporter.py @@ -32,7 +32,6 @@ extract_lora_ckpt, ) from nemo.export.multimodal.run import MultimodalModelRunner, SpeechllmModelRunner -from nemo.export.tarutils import unpack_tarball use_deploy = True try: @@ -152,8 +151,7 @@ def export( if os.path.isdir(lora_checkpoint_path): lora_dir = lora_checkpoint_path else: - lora_dir = os.path.join(tmp_dir.name, "unpacked_lora") - unpack_tarball(lora_checkpoint_path, lora_dir) + raise ValueError("lora_checkpoint_path in nemo1 is not supported. It must be a directory") llm_lora_path = [extract_lora_ckpt(lora_dir, tmp_dir.name)] else: diff --git a/nemo/export/trt_llm/qnemo/qnemo_to_tensorrt_llm.py b/nemo/export/trt_llm/qnemo/qnemo_to_tensorrt_llm.py index 6610fa8b283c..875e31486ef7 100644 --- a/nemo/export/trt_llm/qnemo/qnemo_to_tensorrt_llm.py +++ b/nemo/export/trt_llm/qnemo/qnemo_to_tensorrt_llm.py @@ -13,6 +13,7 @@ # limitations under the License. import glob +import itertools import os import subprocess import warnings @@ -78,42 +79,42 @@ def qnemo_to_tensorrt_llm( speculative_decoding_mode = "medusa" if "Medusa" in config.architecture else None - build_cmd = "trtllm-build " - build_cmd += f"--checkpoint_dir {nemo_checkpoint_path} " - build_cmd += f"--log_level {log_level} " - build_cmd += f"--output_dir {engine_dir} " - build_cmd += f"--workers {num_build_workers} " - build_cmd += f"--max_batch_size {max_batch_size} " - build_cmd += f"--max_input_len {max_input_len} " - build_cmd += f"--max_beam_width {max_beam_width} " - build_cmd += f"--max_prompt_embedding_table_size {max_prompt_embedding_table_size} " - build_cmd += f"--paged_kv_cache {'enable' if paged_kv_cache else 'disable'} " - build_cmd += f"--use_paged_context_fmha {'enable' if paged_context_fmha else 'disable'} " - build_cmd += f"--remove_input_padding {'enable' if remove_input_padding else 'disable'} " - build_cmd += f"--multiple_profiles {'enable' if multiple_profiles else 'disable'} " - build_cmd += f"--reduce_fusion {'enable' if reduce_fusion else 'disable'} " - build_cmd += f"--use_fused_mlp {'enable' if use_fused_mlp else 'disable'} " + build_cmd = ["trtllm-build"] + build_cmd.extend(["--checkpoint_dir", nemo_checkpoint_path]) + build_cmd.extend(["--log_level", log_level]) + build_cmd.extend(["--output_dir", engine_dir]) + build_cmd.extend(["--workers", str(num_build_workers)]) + build_cmd.extend(["--max_batch_size", str(max_batch_size)]) + build_cmd.extend(["--max_input_len", str(max_input_len)]) + build_cmd.extend(["--max_beam_width", str(max_beam_width)]) + build_cmd.extend(["--max_prompt_embedding_table_size", str(max_prompt_embedding_table_size)]) + build_cmd.extend(["--paged_kv_cache", "enable" if paged_kv_cache else "disable"]) + build_cmd.extend(["--use_paged_context_fmha", "enable" if paged_context_fmha else "disable"]) + build_cmd.extend(["--remove_input_padding", "enable" if remove_input_padding else "disable"]) + build_cmd.extend(["--multiple_profiles", "enable" if multiple_profiles else "disable"]) + build_cmd.extend(["--reduce_fusion", "enable" if reduce_fusion else "disable"]) + build_cmd.extend(["--use_fused_mlp", "enable" if use_fused_mlp else "disable"]) if not use_qdq: - build_cmd += "--gemm_plugin auto " + build_cmd.extend(["--gemm_plugin", "auto"]) if max_seq_len is not None: - build_cmd += f"--max_seq_len {max_seq_len} " + build_cmd.extend(["--max_seq_len", str(max_seq_len)]) if max_num_tokens is not None: - build_cmd += f"--max_num_tokens {max_num_tokens} " + build_cmd.extend(["--max_num_tokens", str(max_num_tokens)]) else: - build_cmd += f"--max_num_tokens {max_batch_size * max_input_len} " + build_cmd.extend(["--max_num_tokens", str(max_batch_size * max_input_len)]) if opt_num_tokens is not None: - build_cmd += f"--opt_num_tokens {opt_num_tokens} " + build_cmd.extend(["--opt_num_tokens", str(opt_num_tokens)]) if speculative_decoding_mode: - build_cmd += f"--speculative_decoding_mode {speculative_decoding_mode} " + build_cmd.extend(["--speculative_decoding_mode", speculative_decoding_mode]) build_cmd = build_cmd.replace("--", "\\\n --") # Separate parameters line by line print("trtllm-build command:") - print(build_cmd) + print("".join(itertools.chain.from_iterable(zip(build_cmd, itertools.cycle(["\n ", " "])))).strip()) - subprocess.run(build_cmd, shell=True, check=True) + subprocess.run(build_cmd, shell=False, check=True) From dd31aad7da4c1b1963d36757444f945b949938a8 Mon Sep 17 00:00:00 2001 From: oyilmaz-nvidia Date: Wed, 16 Jul 2025 21:36:53 +0000 Subject: [PATCH 2/6] Apply isort and black reformatting Signed-off-by: oyilmaz-nvidia --- nemo/export/tarutils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/nemo/export/tarutils.py b/nemo/export/tarutils.py index 6367fa051db3..40add3162db6 100644 --- a/nemo/export/tarutils.py +++ b/nemo/export/tarutils.py @@ -263,5 +263,3 @@ def keys(self): Returns an iterator over the keys in the store. """ return self._path.iterdir() - - From 94b7c7978b0f476ed2f7ed1cf1eaf8dda9ea0064 Mon Sep 17 00:00:00 2001 From: Onur Yilmaz Date: Wed, 16 Jul 2025 17:40:39 -0400 Subject: [PATCH 3/6] Removing another line Signed-off-by: Onur Yilmaz --- nemo/export/trt_llm/qnemo/qnemo_to_tensorrt_llm.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/nemo/export/trt_llm/qnemo/qnemo_to_tensorrt_llm.py b/nemo/export/trt_llm/qnemo/qnemo_to_tensorrt_llm.py index 875e31486ef7..003d1aba2a2c 100644 --- a/nemo/export/trt_llm/qnemo/qnemo_to_tensorrt_llm.py +++ b/nemo/export/trt_llm/qnemo/qnemo_to_tensorrt_llm.py @@ -112,8 +112,6 @@ def qnemo_to_tensorrt_llm( if speculative_decoding_mode: build_cmd.extend(["--speculative_decoding_mode", speculative_decoding_mode]) - build_cmd = build_cmd.replace("--", "\\\n --") # Separate parameters line by line - print("trtllm-build command:") print("".join(itertools.chain.from_iterable(zip(build_cmd, itertools.cycle(["\n ", " "])))).strip()) From e565a680b924cd83bc6753511f9059fa1cd8e387 Mon Sep 17 00:00:00 2001 From: Charlie Truong Date: Wed, 16 Jul 2025 21:17:59 -0500 Subject: [PATCH 4/6] Remove unpack_tarball from tensorrt llm export Signed-off-by: Charlie Truong --- nemo/export/tensorrt_llm.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/nemo/export/tensorrt_llm.py b/nemo/export/tensorrt_llm.py index 35d769e596be..a19d342713b7 100644 --- a/nemo/export/tensorrt_llm.py +++ b/nemo/export/tensorrt_llm.py @@ -78,7 +78,7 @@ from transformers import PreTrainedTokenizerBase from nemo.deploy import ITritonDeployable -from nemo.export.tarutils import TarPath, unpack_tarball +from nemo.export.tarutils import TarPath from nemo.export.trt_llm.converter.model_converter import determine_quantization_settings, model_to_trtllm_ckpt from nemo.export.trt_llm.converter.model_to_trt_llm_ckpt import dist_model_to_trt_llm_ckpt, get_layer_prefix from nemo.export.trt_llm.converter.utils import init_model_parallel_from_nemo @@ -326,8 +326,7 @@ def export( if os.path.isdir(nemo_checkpoint_path): nemo_export_dir = nemo_checkpoint_path else: - unpack_tarball(nemo_checkpoint_path, tmp_dir.name) - nemo_checkpoint_path = tmp_dir.name + raise ValueError("Checkpoint path must be a directory") if os.path.exists(os.path.join(nemo_checkpoint_path, TOKENIZER_CONFIG_FILE)): # Instantiate tokenizer for a legacy "Nemo 1" quantized checkpoint from a tokenizer config. From 87b472838690bbf3789cff38f22c6b3b9cc0d9d5 Mon Sep 17 00:00:00 2001 From: Charlie Truong Date: Wed, 16 Jul 2025 21:19:32 -0500 Subject: [PATCH 5/6] Set export deploy gpu unit tests as optional Signed-off-by: Charlie Truong --- .github/workflows/cicd-main-export-deploy.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cicd-main-export-deploy.yml b/.github/workflows/cicd-main-export-deploy.yml index f1a35a3d1dbc..9195989a87ae 100644 --- a/.github/workflows/cicd-main-export-deploy.yml +++ b/.github/workflows/cicd-main-export-deploy.yml @@ -37,6 +37,7 @@ jobs: include: - script: L0_Unit_Tests_GPU_Export_Deploy runner: self-hosted-azure + is-optional: true - script: L0_Unit_Tests_CPU_Export_Deploy runner: self-hosted-azure-cpu cpu-only: true From 6891a86473cd0eaf31903c4a40670680535e6b9d Mon Sep 17 00:00:00 2001 From: Charlie Truong Date: Wed, 16 Jul 2025 23:19:30 -0500 Subject: [PATCH 6/6] Fix setting gpu export unit tests as optional Signed-off-by: Charlie Truong --- .github/workflows/cicd-main-export-deploy.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cicd-main-export-deploy.yml b/.github/workflows/cicd-main-export-deploy.yml index 9195989a87ae..088ba639eb11 100644 --- a/.github/workflows/cicd-main-export-deploy.yml +++ b/.github/workflows/cicd-main-export-deploy.yml @@ -49,7 +49,7 @@ jobs: runner: self-hosted-azure needs: [build] runs-on: ${{ matrix.runner }} - name: ${{ matrix.script }} + name: ${{ matrix.is-optional && 'PLEASEFIXME_' || '' }}${{ matrix.script }} steps: - name: Checkout uses: actions/checkout@v4 @@ -64,6 +64,7 @@ jobs: tests_to_run: ${{ inputs.test_to_run }} image: ${{ inputs.image-name }} cpu-only: ${{ matrix.cpu-only || false }} + is_optional: ${{ matrix.is-optional || false }} e2e-tests: strategy: fail-fast: false