Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
465856c
init
nv-yunzheq Jan 21, 2026
0b78ac9
add init in tests
nv-yunzheq Jan 21, 2026
3b6e0e5
consolidate test
nv-yunzheq Jan 21, 2026
00b48b0
fix autotuner
nv-yunzheq Jan 22, 2026
d98527b
fix CPU overhead
nv-yunzheq Jan 22, 2026
278406d
update benchmark script and unit test
nv-yunzheq Jan 23, 2026
3adbd7d
update benchmark script
nv-yunzheq Jan 26, 2026
421890a
Merge branch 'main' into cuteDSL_moe
nv-yunzheq Jan 26, 2026
f351efe
fix cuda graph compatibility
nv-yunzheq Jan 26, 2026
0993f52
update test script
nv-yunzheq Jan 26, 2026
ec2f14d
update benchmark and unit test
nv-yunzheq Jan 26, 2026
34f36d9
fix incorrect cuda graph capture issue
nv-yunzheq Jan 27, 2026
dccd29d
update benchmark script
nv-yunzheq Jan 27, 2026
2369e84
udpate tflops calculation
nv-yunzheq Jan 27, 2026
770338e
to only support sm100f
nv-yunzheq Jan 27, 2026
f5c3544
fix unbounded cache error
nv-yunzheq Jan 27, 2026
e7e7a83
fix cache kery error
nv-yunzheq Jan 27, 2026
5da8dca
add reference check for EP
nv-yunzheq Jan 27, 2026
99975ba
add api decorator for wrapper API
nv-yunzheq Jan 27, 2026
e78c152
update API export
nv-yunzheq Jan 28, 2026
a7493a2
rename moe_utils to cute_dsl_moe_utils
nv-yunzheq Jan 28, 2026
fe899fa
Merge branch 'main' into cuteDSL_moe
aleozlx Jan 30, 2026
71a318a
updat e __init__.py
nv-yunzheq Jan 31, 2026
9482890
Merge remote-tracking branch 'origin/main' into cuteDSL_moe
yzh119 Feb 1, 2026
a5b320a
Skip trtllm_alltoall tests on Thor (#2448)
dierksen Feb 1, 2026
1217a94
Fix argument type error in _cudnn_gemm_fp4_requirement (#2450)
Kangyan-Zhou Feb 2, 2026
bde23e5
fix: set_log_level now properly sets logger level to enable DEBUG log…
kahyunnam Feb 2, 2026
c8cbaa5
bugfix: fix stub generation directory in fused_moe module (#2445)
yzh119 Feb 2, 2026
e70c162
update test and benchmark
nv-yunzheq Feb 2, 2026
2cff527
move computation to CPU to avoid cublas layout issue
nv-yunzheq Feb 2, 2026
5a8c23f
Merge remote-tracking branch 'origin/main' into cuteDSL_moe
yzh119 Feb 3, 2026
e475fbb
Merge branch 'main' into cuteDSL_moe
nv-yunzheq Feb 4, 2026
0c2582d
merge main
nv-yunzheq Feb 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,074 changes: 1,074 additions & 0 deletions benchmarks/bench_moe_deepseek.py

Large diffs are not rendered by default.

346 changes: 346 additions & 0 deletions csrc/moe_utils_binding.cu

Large diffs are not rendered by default.

59 changes: 59 additions & 0 deletions csrc/nv_internal/include/tensorrt_llm/common/config.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* Copyright (c) 2022-2025, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once
#ifndef TRTLLM_CONFIG_H
#define TRTLLM_CONFIG_H

/**
* \def TRTLLM_ABI_NAMESPACE
* This macro is used to open an implicitly inline namespace block for the ABI version.
* This macro can be overridden to change the ABI version.
* The default ABI version is _v1.
*/
#ifndef TRTLLM_ABI_NAMESPACE
#define TRTLLM_ABI_NAMESPACE _v1
#endif

#ifndef TRTLLM_ABI_NAMESPACE_BEGIN
#define TRTLLM_ABI_NAMESPACE_BEGIN inline namespace TRTLLM_ABI_NAMESPACE {
#endif

#ifndef TRTLLM_ABI_NAMESPACE_END
#define TRTLLM_ABI_NAMESPACE_END }
#endif

/**
* \def TRTLLM_NAMESPACE_BEGIN
* This macro is used to open a `tensorrt_llm::` namespace block, along with any
* enclosing namespaces requested by TRTLLM_WRAPPED_NAMESPACE, etc.
* This macro is defined by TensorRT-LLM and may not be overridden.
*/
#define TRTLLM_NAMESPACE_BEGIN \
namespace tensorrt_llm { \
TRTLLM_ABI_NAMESPACE_BEGIN

/**
* \def TRTLLM_NAMESPACE_END
* This macro is used to close a `tensorrt_llm::` namespace block, along with any
* enclosing namespaces requested by TRTLLM_WRAPPED_NAMESPACE, etc.
* This macro is defined by TensorRT-LLM and may not be overridden.
*/
#define TRTLLM_NAMESPACE_END \
TRTLLM_ABI_NAMESPACE_END \
} /* end namespace tensorrt_llm */

#endif // TRTLLM_CONFIG_H
Loading
Loading