PaddlePaddle · yuanlehome · Apr 8, 2024 · Apr 8, 2024 · Apr 8, 2024
diff --git a/paddle/fluid/pir/transforms/gpu/fused_flash_attn_pass.cc b/paddle/fluid/pir/transforms/gpu/fused_flash_attn_pass.cc
@@ -434,15 +434,26 @@ class TransposeSliceFlashAttnPattern : public paddle::drr::DrrPatternBase {
     // Result Pattern.
     //
     paddle::drr::ResultPattern res = src.ResultPattern();
+    // [b, head, seq_len, head_dim] -> [b, seq_len, head, head_dim]
+    const auto &q_transpose = res.Op(
+        "pd_op.transpose", {{"perm", res.VectorInt32Attr({0, 2, 1, 3})}});
+    res.Tensor("q_transpose") = q_transpose(res.Tensor("q"));
+    const auto &k_transpose = res.Op(
+        "pd_op.transpose", {{"perm", res.VectorInt32Attr({0, 2, 1, 3})}});
+    res.Tensor("k_transpose") = k_transpose(res.Tensor("k"));
+    const auto &v_transpose = res.Op(
+        "pd_op.transpose", {{"perm", res.VectorInt32Attr({0, 2, 1, 3})}});
+    res.Tensor("v_transpose") = v_transpose(res.Tensor("v"));
+
     const auto &flash_attn = res.Op("pd_op.flash_attn",
                                     {{{"dropout", res.Float32Attr(0.0)},
                                       {"causal", res.BoolAttr(false)},
                                       {"return_softmax", res.BoolAttr(false)},
                                       {"is_test", res.BoolAttr(true)},
                                       {"rng_name", res.StrAttr("")}}});
-    flash_attn({&res.Tensor("q"),
-                &res.Tensor("k"),
-                &res.Tensor("v"),
+    flash_attn({&res.Tensor("q_transpose"),
+                &res.Tensor("k_transpose"),
+                &res.Tensor("v_transpose"),
                 &res.InputNoneTensor(),
                 &res.Tensor("mask")},
                {&res.Tensor("out"),

diff --git a/test/ir/pir/fused_pass/test_fused_flash_attn_pass.py b/test/ir/pir/fused_pass/test_fused_flash_attn_pass.py
@@ -544,41 +544,43 @@ def test_check_output(self):
 )
 class TestTransposeSliceFlashAttnPattern(PassTest):
     r"""
-                transpose
+                 transpose
+                     |
+          -----------+----------
+          |          |           |
+        slice       slice      slice
+          |          |           |
+          Q          K           V
+          |          |           |
+          |       transpose      |
+          |          |           |
+          -- matmul--            |
+               |                 |
+             scale               |
+               |                 |
+     mask --- add                |
+               |                 |
+            softmax              |
+               |                 |
+               ------matmul------
+                       |
+                   transpose
+                       |
+                      out
+
+            transpose
+                |
+          ------+------
+          |     |     |
+        slice slice slice
+          |     |     |
+          Q     K     V              mask
+          |     |     |               |
+    tranpose tranpose tranpose        |
+          |     |     |               |
+          -------flash_attn------------
                     |
-         -----------+----------
-         |          |           |
-       slice       slice      slice
-         |          |           |
-         Q          K           V
-         |          |           |
-         |       transpose      |
-         |          |           |
-         -- matmul--            |
-              |                 |
-            scale               |
-              |                 |
-    mask --- add                |
-              |                 |
-           softmax              |
-              |                 |
-              ------matmul------
-                      |
-                  transpose
-                      |
-                     out
-
-           transpose
-               |
-         ------+------
-         |     |     |
-       slice slice slice
-         |     |     |
-         Q     K     V       mask
-         |     |     |        |
-         ------flash_attn------
-                   |
-                  out
+                   out
     """
 
     def is_program_valid(self, program=None):