From 75136e4b56422d2556b88efa377500dd54202c2c Mon Sep 17 00:00:00 2001 From: Jonas Kuebler Date: Mon, 15 Sep 2025 06:18:02 +0000 Subject: [PATCH 1/2] optimize fp8 tile sizes for headdim 64 for faster fp8 decoding Signed-off-by: Jonas Kuebler --- hopper/tile_size.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hopper/tile_size.h b/hopper/tile_size.h index d63999c638..aae4ed3ec8 100644 --- a/hopper/tile_size.h +++ b/hopper/tile_size.h @@ -52,7 +52,10 @@ constexpr std::tuple tile_size_fwd_sm90( } } else { if (headdim <= 64) { - return {192, 160, true, true}; + if (use_one_mma_wg) { + return {64, 128, true, true}; + } else { + return {192, 160, true, true}; } else if (headdim <= 96) { return {192, 128, true, true}; } else if (headdim <= 128) { From 90a6e1df27cc8e52e01fdc6020ec0bb6a1275272 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20M=2E=20K=C3=BCbler?= <44084297+jmkuebler@users.noreply.github.com> Date: Mon, 15 Sep 2025 22:12:55 +0200 Subject: [PATCH 2/2] fix syntax error Signed-off-by: Jonas Kuebler --- hopper/tile_size.h | 1 + 1 file changed, 1 insertion(+) diff --git a/hopper/tile_size.h b/hopper/tile_size.h index aae4ed3ec8..24c76b84c2 100644 --- a/hopper/tile_size.h +++ b/hopper/tile_size.h @@ -56,6 +56,7 @@ constexpr std::tuple tile_size_fwd_sm90( return {64, 128, true, true}; } else { return {192, 160, true, true}; + } } else if (headdim <= 96) { return {192, 128, true, true}; } else if (headdim <= 128) {