Update vllm/attention/ops/triton_decode_attention.py

qli88 · hongxiayang · qli88 · commit 148e8770ee9d · 2025-02-20T16:31:51.000Z
Co-authored-by: Hongxia Yang &lt;62075498+hongxiayang@users.noreply.github.com&gt;
Signed-off-by: qli88 &lt;qiang.li2@amd.com&gt;
diff --git a/vllm/attention/ops/triton_decode_attention.py b/vllm/attention/ops/triton_decode_attention.py
@@ -425,7 +425,7 @@ def _decode_grouped_att_m_fwd(
     extra_kargs = {}
     num_stages = 2
     if is_hip_:
-        # https://rocm.docs.amd.com/en/docs-6.2.0/how-to/llm-fine-tuning-optimization/optimizing-triton-kernel.html
+        # https://rocm.docs.amd.com/en/latest/how-to/rocm-for-ai/inference-optimization/workload.html#triton-kernel-performance-optimization
         # https://github.com/triton-lang/triton/blob/main/third_party/amd/backend/compiler.py
         extra_kargs = {
             "waves_per_eu": 1,