diff --git a/csrc/nv_internal/tensorrt_llm/kernels/communicationKernels/moeAlltoAllKernels.cu b/csrc/nv_internal/tensorrt_llm/kernels/communicationKernels/moeAlltoAllKernels.cu index db49a42f1f..e524f7efe8 100644 --- a/csrc/nv_internal/tensorrt_llm/kernels/communicationKernels/moeAlltoAllKernels.cu +++ b/csrc/nv_internal/tensorrt_llm/kernels/communicationKernels/moeAlltoAllKernels.cu @@ -762,7 +762,11 @@ __global__ void moeA2ACombineKernel( return; } } +#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900)) asm volatile("fence.acquire.sys;"); +#else + __threadfence_system(); +#endif } __syncthreads(); #endif