Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/install-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ jobs:
runs-on: linux-amd64-cpu16
name: UV - AMD64/Linux - NGC ${{ contains(matrix.image, 'cuda') && 'CUDA' || 'PyTorch' }}
container:
image: ubuntu:24.04
image: nvcr.io/nvidia/pytorch:25.05-py3
environment: nemo-ci
steps:
- name: Checkout repository
Expand All @@ -128,7 +128,7 @@ jobs:
FRAMEWORK=("--inference-framework" "inframework")
fi

bash docker/common/install.sh --base-image ubuntu --use-uv --python-version 3.12 "${FRAMEWORK[@]}"
bash docker/common/install.sh --base-image pytorch --use-uv --python-version 3.12 "${FRAMEWORK[@]}"
uv run python -m ensurepip --upgrade
ln -sf /opt/venv/bin/pip3 /opt/venv/bin/pip
uv pip install --no-deps -e .
Expand Down
4 changes: 4 additions & 0 deletions docker/common/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ main() {
--all-groups ${UV_ARGS[@]}
# Install the package
uv pip install --no-deps -e .

patch -p1 $(uv run python -c "import triton; print(triton.__path__[0])")/runtime/autotuner.py external/patches/triton-lang_triton_6570_lazy_init.patch
else
if [[ "$INFERENCE_FRAMEWORK" != "inframework" ]]; then
EXTRA="[$INFERENCE_FRAMEWORK]"
Expand All @@ -154,6 +156,8 @@ main() {


pip install --pre --no-cache-dir --no-build-isolation .$EXTRA

patch -p1 $(python -c "import triton; print(triton.__path__[0])")/runtime/autotuner.py external/patches/triton-lang_triton_6570_lazy_init.patch
fi

}
Expand Down
42 changes: 19 additions & 23 deletions external/patches/triton-lang_triton_6570_lazy_init.patch
Original file line number Diff line number Diff line change
@@ -1,32 +1,31 @@
From 7240b92457a723a3a3ec2292e40df6274382524c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <[email protected]>
Date: Wed, 11 Jun 2025 19:13:30 +0000
Subject: [PATCH] f
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
/*
* Code imported via patch from https://github.com/triton-lang/triton/pull/6570, commit 2afae45951b74785b144151b31e91e6c82b0b02f.
* Copyright (c) 2018-2022 Philippe Tillet, OpenAI.
* Licensed under the MIT License.
*/

Signed-off-by: oliver könig <[email protected]>
---
external/patches/main.py | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
From 2afae45951b74785b144151b31e91e6c82b0b02f Mon Sep 17 00:00:00 2001
From: Han Zhu <[email protected]>
Date: Tue, 22 Apr 2025 18:42:23 -0700
Subject: [PATCH] [autotuner] Lazily initiailize do_bench

diff --git a/usr/local/lib/python3.12/dist-packages/triton/runtime/autotuner.py b/usr/local/lib/python3.12/dist-packages/triton/runtime/autotuner.py
index 69305dc94..4600542b8 100644
--- a/usr/local/lib/python3.12/dist-packages/triton/runtime/autotuner.py
+++ b/usr/local/lib/python3.12/dist-packages/triton/runtime/autotuner.py
---
diff --git a/a/autotuner.py b/b/autotuner.py
index 0ee6bea09..b75c5e353 100644
--- a/a/autotuner.py
+++ b/b/autotuner.py
@@ -4,6 +4,7 @@ import builtins
import os
import time
import inspect
+from functools import cached_property
from typing import Dict, Tuple, List, Optional

from .jit import KernelInterface
@@ -94,6 +95,7 @@ class Autotuner(KernelInterface):
while not inspect.isfunction(self.base_fn):
self.base_fn = self.base_fn.fn

+ self._do_bench = do_bench
self.num_warmups = warmup
self.num_reps = rep
Expand All @@ -42,7 +41,7 @@ index 69305dc94..4600542b8 100644
quantiles=quantiles,
@@ -115,7 +117,7 @@ class Autotuner(KernelInterface):
return

import triton.testing
- self.do_bench = lambda kernel_call, quantiles: triton.testing.do_bench(
+ self._do_bench = lambda kernel_call, quantiles: triton.testing.do_bench(
Expand All @@ -52,7 +51,7 @@ index 69305dc94..4600542b8 100644
@@ -123,10 +125,11 @@ class Autotuner(KernelInterface):
)
return

- if do_bench is None:
- self.do_bench = driver.active.get_benchmarker()
- else:
Expand All @@ -62,9 +61,6 @@ index 69305dc94..4600542b8 100644
+ if self._do_bench is None:
+ return driver.active.get_benchmarker()
+ return self._do_bench

def _bench(self, *args, config, **meta):
from ..compiler.errors import CompileTimeAssertionFailure
--
2.43.0

41 changes: 20 additions & 21 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,41 +41,26 @@ description = "NeMo Export and Deploy - a library to export and deploy LLMs and
requires-python = ">=3.10,<3.13"
license = { text = "Apache 2.0" }
dependencies = [
"megatron-core>=0.14.0a0,<0.15.0",
"megatron-bridge>=0.1.0a0,<0.2.0",
"nvidia-modelopt[torch]>=0.33.0a0,<0.34.0; sys_platform != 'darwin'",
"nvidia-resiliency-ext>=0.4.0a0,<0.5.0; sys_platform != 'darwin'",
"transformer-engine[pytorch]>=2.6.0a0,<2.7.0; sys_platform != 'darwin'",
"accelerate",
"megatron-bridge>=0.2.0a0,<0.3.0",
"megatron-core[mlm,dev]>=0.15.0a0,<0.16.0",
"fastapi",
"pydantic-settings",
"ray",
"ray[serve]",
"uvicorn",
"tensorstore",
"zarr>=2.18.2,<3.0.0",
# Lightning deps
"cloudpickle",
"fiddle",
"hydra-core>1.3,<=1.3.2",
"lightning",
"omegaconf>=2.3.0",
"peft",
"torch==2.7.1",
"torchvision",
"torchmetrics>=0.11.0",
"wandb",
"webdataset>=0.2.86",
"nvidia-pytriton ; platform_system != 'Darwin' ",
"flashinfer-python>=0.2.5 ; platform_system != 'Darwin'",
"Pillow ; platform_system != 'Darwin' and platform_machine != 'aarch64'",
"decord ; platform_system != 'Darwin' and platform_machine != 'aarch64'",
"pyparsing>2.0.2",
"sentencepiece",
"tiktoken",
"einops",
"ijson",
"pyarrow<21.0.0",
"peft",

]

[project.optional-dependencies]
Expand All @@ -99,6 +84,12 @@ linting = ["pre-commit>=3.6.0", "ruff~=0.9.0"]
test = ["pytest", "pytest-mock", "coverage", "click"]
nemo-toolkit = [
"nemo-toolkit[automodel,common-only,nlp-only,eval,multimodal-only]>=2.5.0a0,<2.6.0",
# Lightning deps
"cloudpickle",
"fiddle",
"hydra-core>1.3,<=1.3.2",
"lightning",
"omegaconf>=2.3.0",
]
nemo-run = ["nemo-run"]

Expand All @@ -115,7 +106,14 @@ transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git",

[tool.uv]
# Currently, TE must be built with no build-isolation b/c it requires torch
no-build-isolation-package = ["transformer-engine", "transformer-engine-torch"]
no-build-isolation-package = [
"transformer-engine",
"transformer-engine-torch",
"flash-attn",
"mamba-ssm",
"causal-conv1d",
"nv-grouped-gemm",
]
# Always apply the build group since dependencies like TE/mcore/nemo-run require build dependencies
# and this lets us assume they are implicitly installed with a simply `uv sync`. Ideally, we'd
# avoid including these in the default dependency set, but for now it's required.
Expand All @@ -128,8 +126,9 @@ link-mode = "copy"
conflicts = [[{ extra = "trtllm" }, { extra = "vllm" }, { extra = "trt-onnx" }]]
override-dependencies = [
"urllib3>1.27.0",
"tiktoken>=0.9.0", # because nemo-toolkit and megatron-bridge disagree on tiktoken, we need to pin it here,
"tiktoken>=0.9.0", # because nemo-toolkit and megatron-bridge disagree on tiktoken, we need to pin it here,
"fsspec[http]>=2023.1.0,<=2024.9.0",
"megatron-energon[av-decode]>=6.0,<7.dev0", # because nemo-toolkit and megatron-core disagree on megatron-energon, we need to pin it here,
]
prerelease = "allow"

Expand Down
Loading
Loading