Skip to content

Commit 8fecd1c

Browse files
oyilmaz-nvidiachtruong814
authored andcommitted
Moving export security fixes over here (NVIDIA-NeMo#14254)
* Moving security fixes over here Signed-off-by: Onur Yilmaz <[email protected]> * Apply isort and black reformatting Signed-off-by: oyilmaz-nvidia <[email protected]> * Removing another line Signed-off-by: Onur Yilmaz <[email protected]> * Remove unpack_tarball from tensorrt llm export Signed-off-by: Charlie Truong <[email protected]> * Set export deploy gpu unit tests as optional Signed-off-by: Charlie Truong <[email protected]> * Fix setting gpu export unit tests as optional Signed-off-by: Charlie Truong <[email protected]> --------- Signed-off-by: Onur Yilmaz <[email protected]> Signed-off-by: oyilmaz-nvidia <[email protected]> Signed-off-by: Charlie Truong <[email protected]> Co-authored-by: oyilmaz-nvidia <[email protected]> Co-authored-by: Charlie Truong <[email protected]>
1 parent 00ea2d8 commit 8fecd1c

File tree

5 files changed

+30
-44
lines changed

5 files changed

+30
-44
lines changed

.github/workflows/cicd-main-export-deploy.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ jobs:
3737
include:
3838
- script: L0_Unit_Tests_GPU_Export_Deploy
3939
runner: self-hosted-azure
40+
is-optional: true
4041
- script: L0_Unit_Tests_CPU_Export_Deploy
4142
runner: self-hosted-azure-cpu
4243
cpu-only: true
@@ -48,7 +49,7 @@ jobs:
4849
runner: self-hosted-azure
4950
needs: [build]
5051
runs-on: ${{ matrix.runner }}
51-
name: ${{ matrix.script }}
52+
name: ${{ matrix.is-optional && 'PLEASEFIXME_' || '' }}${{ matrix.script }}
5253
steps:
5354
- name: Checkout
5455
uses: actions/checkout@v4
@@ -63,6 +64,7 @@ jobs:
6364
tests_to_run: ${{ inputs.test_to_run }}
6465
image: ${{ inputs.image-name }}
6566
cpu-only: ${{ matrix.cpu-only || false }}
67+
is_optional: ${{ matrix.is-optional || false }}
6668
e2e-tests:
6769
strategy:
6870
fail-fast: false

nemo/export/tarutils.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -263,15 +263,3 @@ def keys(self):
263263
Returns an iterator over the keys in the store.
264264
"""
265265
return self._path.iterdir()
266-
267-
268-
def unpack_tarball(archive: str, dest_dir: str):
269-
"""
270-
Unpacks a tarball into a destination directory.
271-
272-
Args:
273-
archive (str): The path to the tarball.
274-
dest_dir (str): The path to the destination directory.
275-
"""
276-
with tarfile.open(archive, mode="r") as tar:
277-
tar.extractall(path=dest_dir)

nemo/export/tensorrt_llm.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@
7878
from transformers import PreTrainedTokenizerBase
7979

8080
from nemo.deploy import ITritonDeployable
81-
from nemo.export.tarutils import TarPath, unpack_tarball
81+
from nemo.export.tarutils import TarPath
8282
from nemo.export.trt_llm.converter.model_converter import determine_quantization_settings, model_to_trtllm_ckpt
8383
from nemo.export.trt_llm.converter.model_to_trt_llm_ckpt import dist_model_to_trt_llm_ckpt, get_layer_prefix
8484
from nemo.export.trt_llm.converter.utils import init_model_parallel_from_nemo
@@ -326,8 +326,7 @@ def export(
326326
if os.path.isdir(nemo_checkpoint_path):
327327
nemo_export_dir = nemo_checkpoint_path
328328
else:
329-
unpack_tarball(nemo_checkpoint_path, tmp_dir.name)
330-
nemo_checkpoint_path = tmp_dir.name
329+
raise ValueError("Checkpoint path must be a directory")
331330

332331
if os.path.exists(os.path.join(nemo_checkpoint_path, TOKENIZER_CONFIG_FILE)):
333332
# Instantiate tokenizer for a legacy "Nemo 1" quantized checkpoint from a tokenizer config.

nemo/export/tensorrt_mm_exporter.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
extract_lora_ckpt,
3333
)
3434
from nemo.export.multimodal.run import MultimodalModelRunner, SpeechllmModelRunner
35-
from nemo.export.tarutils import unpack_tarball
3635

3736
use_deploy = True
3837
try:
@@ -152,8 +151,7 @@ def export(
152151
if os.path.isdir(lora_checkpoint_path):
153152
lora_dir = lora_checkpoint_path
154153
else:
155-
lora_dir = os.path.join(tmp_dir.name, "unpacked_lora")
156-
unpack_tarball(lora_checkpoint_path, lora_dir)
154+
raise ValueError("lora_checkpoint_path in nemo1 is not supported. It must be a directory")
157155

158156
llm_lora_path = [extract_lora_ckpt(lora_dir, tmp_dir.name)]
159157
else:

nemo/export/trt_llm/qnemo/qnemo_to_tensorrt_llm.py

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
import glob
16+
import itertools
1617
import os
1718
import subprocess
1819
import warnings
@@ -78,42 +79,40 @@ def qnemo_to_tensorrt_llm(
7879

7980
speculative_decoding_mode = "medusa" if "Medusa" in config.architecture else None
8081

81-
build_cmd = "trtllm-build "
82-
build_cmd += f"--checkpoint_dir {nemo_checkpoint_path} "
83-
build_cmd += f"--log_level {log_level} "
84-
build_cmd += f"--output_dir {engine_dir} "
85-
build_cmd += f"--workers {num_build_workers} "
86-
build_cmd += f"--max_batch_size {max_batch_size} "
87-
build_cmd += f"--max_input_len {max_input_len} "
88-
build_cmd += f"--max_beam_width {max_beam_width} "
89-
build_cmd += f"--max_prompt_embedding_table_size {max_prompt_embedding_table_size} "
90-
build_cmd += f"--paged_kv_cache {'enable' if paged_kv_cache else 'disable'} "
91-
build_cmd += f"--use_paged_context_fmha {'enable' if paged_context_fmha else 'disable'} "
92-
build_cmd += f"--remove_input_padding {'enable' if remove_input_padding else 'disable'} "
93-
build_cmd += f"--multiple_profiles {'enable' if multiple_profiles else 'disable'} "
94-
build_cmd += f"--reduce_fusion {'enable' if reduce_fusion else 'disable'} "
95-
build_cmd += f"--use_fused_mlp {'enable' if use_fused_mlp else 'disable'} "
82+
build_cmd = ["trtllm-build"]
83+
build_cmd.extend(["--checkpoint_dir", nemo_checkpoint_path])
84+
build_cmd.extend(["--log_level", log_level])
85+
build_cmd.extend(["--output_dir", engine_dir])
86+
build_cmd.extend(["--workers", str(num_build_workers)])
87+
build_cmd.extend(["--max_batch_size", str(max_batch_size)])
88+
build_cmd.extend(["--max_input_len", str(max_input_len)])
89+
build_cmd.extend(["--max_beam_width", str(max_beam_width)])
90+
build_cmd.extend(["--max_prompt_embedding_table_size", str(max_prompt_embedding_table_size)])
91+
build_cmd.extend(["--paged_kv_cache", "enable" if paged_kv_cache else "disable"])
92+
build_cmd.extend(["--use_paged_context_fmha", "enable" if paged_context_fmha else "disable"])
93+
build_cmd.extend(["--remove_input_padding", "enable" if remove_input_padding else "disable"])
94+
build_cmd.extend(["--multiple_profiles", "enable" if multiple_profiles else "disable"])
95+
build_cmd.extend(["--reduce_fusion", "enable" if reduce_fusion else "disable"])
96+
build_cmd.extend(["--use_fused_mlp", "enable" if use_fused_mlp else "disable"])
9697

9798
if not use_qdq:
98-
build_cmd += "--gemm_plugin auto "
99+
build_cmd.extend(["--gemm_plugin", "auto"])
99100

100101
if max_seq_len is not None:
101-
build_cmd += f"--max_seq_len {max_seq_len} "
102+
build_cmd.extend(["--max_seq_len", str(max_seq_len)])
102103

103104
if max_num_tokens is not None:
104-
build_cmd += f"--max_num_tokens {max_num_tokens} "
105+
build_cmd.extend(["--max_num_tokens", str(max_num_tokens)])
105106
else:
106-
build_cmd += f"--max_num_tokens {max_batch_size * max_input_len} "
107+
build_cmd.extend(["--max_num_tokens", str(max_batch_size * max_input_len)])
107108

108109
if opt_num_tokens is not None:
109-
build_cmd += f"--opt_num_tokens {opt_num_tokens} "
110+
build_cmd.extend(["--opt_num_tokens", str(opt_num_tokens)])
110111

111112
if speculative_decoding_mode:
112-
build_cmd += f"--speculative_decoding_mode {speculative_decoding_mode} "
113-
114-
build_cmd = build_cmd.replace("--", "\\\n --") # Separate parameters line by line
113+
build_cmd.extend(["--speculative_decoding_mode", speculative_decoding_mode])
115114

116115
print("trtllm-build command:")
117-
print(build_cmd)
116+
print("".join(itertools.chain.from_iterable(zip(build_cmd, itertools.cycle(["\n ", " "])))).strip())
118117

119-
subprocess.run(build_cmd, shell=True, check=True)
118+
subprocess.run(build_cmd, shell=False, check=True)

0 commit comments

Comments
 (0)