We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 66233af commit f78c0beCopy full SHA for f78c0be
benchmarks/kernels/benchmark_moe.py
@@ -2,6 +2,7 @@
2
3
import argparse
4
import time
5
+from contextlib import nullcontext
6
from datetime import datetime
7
from itertools import product
8
from typing import Any, TypedDict
@@ -412,7 +413,8 @@ def tune(
412
413
hidden_size, search_space,
414
is_fp16, topk)
415
- with torch.cuda.device(self.device_id):
416
+ with torch.cuda.device(self.device_id) if current_platform.is_rocm(
417
+ ) else nullcontext():
418
for config in tqdm(search_space):
419
try:
420
kernel_time = benchmark_config(
0 commit comments