PaddlePaddle · longranger2 · Nov 21, 2023 · Nov 26, 2023 · Nov 26, 2023 · Nov 27, 2023
diff --git a/python/paddle/nn/functional/flash_attention.py b/python/paddle/nn/functional/flash_attention.py
@@ -15,6 +15,7 @@
 import paddle
 import paddle.nn.functional as F
 from paddle import _C_ops, in_dynamic_mode
+from paddle.base.framework import in_dynamic_or_pir_mode
 from paddle.base.layer_helper import LayerHelper
 from paddle.base.wrapped_decorator import signature_safe_contextmanager
 
@@ -221,7 +222,7 @@ def flash_attention(
     sdp_func_name = _select_sdp(head_dim)
 
     if sdp_func_name == "flash_attn":
-        if in_dynamic_mode():
+        if in_dynamic_or_pir_mode():
             (
                 result_attention,
                 result_softmax,

diff --git a/test/distribution/test_dirichlet_op.py b/test/distribution/test_dirichlet_op.py
@@ -33,13 +33,16 @@ def setUp(self):
         self.op_type = "dirichlet"
         self.alpha = np.array((1.0, 2.0))
         self.sample_shape = (100000, 2)
+        self.python_api = paddle.distribution.Dirichlet(self.alpha).sample(
+            self.sample_shape
+        )
 
         self.inputs = {'Alpha': np.broadcast_to(self.alpha, self.sample_shape)}
         self.attrs = {}
         self.outputs = {'Out': np.zeros(self.sample_shape)}
 
     def test_check_output(self):
-        self.check_output_customized(self._hypothesis_testing)
+        self.check_output_customized(self._hypothesis_testing, check_pir=True)
 
     def _hypothesis_testing(self, outs):
         self.assertEqual(outs[0].shape, self.sample_shape)
@@ -63,6 +66,9 @@ def setUp(self):
         self.alpha = np.array((1.0, 2.0))
         self.sample_shape = (100000, 2)
         self.dtype = np.float16
+        self.python_api = paddle.distribution.Dirichlet(self.alpha).sample(
+            self.sample_shape
+        )
 
         self.inputs = {
             'Alpha': np.broadcast_to(self.alpha, self.sample_shape).astype(
@@ -73,7 +79,7 @@ def setUp(self):
         self.outputs = {'Out': np.zeros(self.sample_shape).astype(self.dtype)}
 
     def test_check_output(self):
-        self.check_output_customized(self._hypothesis_testing)
+        self.check_output_customized(self._hypothesis_testing, check_pir=True)
 
     def _hypothesis_testing(self, outs):
         self.assertEqual(outs[0].shape, self.sample_shape)
@@ -103,6 +109,9 @@ def setUp(self):
         self.sample_shape = (10000, 2)
         self.dtype = np.uint16
         self.np_dtype = np.float32
+        self.python_api = paddle.distribution.Dirichlet(self.alpha).sample(
+            self.sample_shape
+        )
-        self.python_api = paddle.distribution.Dirichlet(self.alpha).sample(
-            self.sample_shape
-        )
-        self.python_api = paddle.distribution.Dirichlet(self.alpha).sample(
-            self.sample_shape
-        )
 
         self.inputs = {
             'Alpha': np.broadcast_to(self.alpha, self.sample_shape).astype(
@@ -119,7 +128,7 @@ def setUp(self):
 
     def test_check_output(self):
         self.check_output_with_place_customized(
-            self._hypothesis_testing, place=core.CUDAPlace(0)
+            self._hypothesis_testing, place=core.CUDAPlace(0), check_pir=True
         )
 
     def _hypothesis_testing(self, outs):

diff --git a/test/distribution/test_distribution_dirichlet_static.py b/test/distribution/test_distribution_dirichlet_static.py
@@ -20,6 +20,7 @@
 from parameterize import TEST_CASE_NAME, parameterize_cls, place
 
 import paddle
+from paddle.pir_utils import test_with_pir_api
 
 np.random.seed(2022)
 paddle.enable_static()
@@ -41,6 +42,7 @@ def setUp(self):
             self._paddle_diric = paddle.distribution.Dirichlet(conc)
             self.feeds = {'conc': self.concentration}
 
+    @test_with_pir_api
     def test_mean(self):
         with paddle.static.program_guard(self.program):
             [out] = self.executor.run(
@@ -55,6 +57,7 @@ def test_mean(self):
                 atol=ATOL.get(str(self.concentration.dtype)),
             )
 
+    @test_with_pir_api
     def test_variance(self):
         with paddle.static.program_guard(self.program):
             [out] = self.executor.run(
@@ -69,6 +72,7 @@ def test_variance(self):
                 atol=ATOL.get(str(self.concentration.dtype)),
             )
 
+    @test_with_pir_api
     def test_prob(self):
         with paddle.static.program_guard(self.program):
             random_number = np.random.rand(*self.concentration.shape)
@@ -88,6 +92,7 @@ def test_prob(self):
                 atol=ATOL.get(str(self.concentration.dtype)),
             )
 
+    @test_with_pir_api
     def test_log_prob(self):
         with paddle.static.program_guard(self.program):
             random_number = np.random.rand(*self.concentration.shape)
@@ -107,6 +112,7 @@ def test_log_prob(self):
                 atol=ATOL.get(str(self.concentration.dtype)),
             )
 
+    @test_with_pir_api
     def test_entropy(self):
         with paddle.static.program_guard(self.program):
             [out] = self.executor.run(

diff --git a/test/legacy_test/test_activation_op.py b/test/legacy_test/test_activation_op.py
@@ -3170,7 +3170,7 @@ def init_shape(self):
     def test_check_grad(self):
         if self.dtype == np.float16:
             return
-        self.check_grad(['X'], 'Out')
+        self.check_grad(['X'], 'Out', check_pir=True)
 
     def get_alpha(self):
         return 1.0
@@ -3201,6 +3201,7 @@ def setUp(self):
     def executed_api(self):
         self.elu = F.elu
 
+    @test_with_pir_api
     def test_static_api(self):
         with static_guard():
             with paddle.static.program_guard(paddle.static.Program()):
@@ -4876,7 +4877,7 @@ def test_check_grad(self):
 create_test_act_fp16_class(TestBRelu, check_pir=True)
 create_test_act_fp16_class(TestRelu6)
 create_test_act_fp16_class(TestSoftRelu, check_dygraph=False)
-create_test_act_fp16_class(TestELU)
+create_test_act_fp16_class(TestELU, check_pir=True)
 create_test_act_fp16_class(TestCELU)
 create_test_act_fp16_class(TestReciprocal, check_pir=True)
 create_test_act_fp16_class(TestLog, check_prim=True, check_pir=True)
@@ -5031,7 +5032,7 @@ def test_check_grad(self):
 create_test_act_bf16_class(TestBRelu, check_pir=True)
 create_test_act_bf16_class(TestRelu6)
 create_test_act_bf16_class(TestSoftRelu, check_dygraph=False)
-create_test_act_bf16_class(TestELU)
+create_test_act_bf16_class(TestELU, check_pir=True)
 create_test_act_bf16_class(TestCELU)
 create_test_act_bf16_class(TestReciprocal, check_pir=True)
 create_test_act_bf16_class(TestLog, check_prim=True, check_pir=True)

diff --git a/test/legacy_test/test_eigvals_op.py b/test/legacy_test/test_eigvals_op.py
@@ -19,6 +19,7 @@
 
 import paddle
 from paddle.base import core
+from paddle.pir_utils import test_with_pir_api
 
 np.set_printoptions(threshold=np.inf)
 
@@ -271,6 +272,7 @@ def run_dygraph(self, place):
         np_outs = np_eigvals(self.batch_input)
         self.verify_output(paddle_outs, np_outs)
 
+    @test_with_pir_api
     def run_static(self, place):
         paddle.enable_static()
         with paddle.static.program_guard(

diff --git a/test/legacy_test/test_flash_attention.py b/test/legacy_test/test_flash_attention.py
@@ -27,6 +27,7 @@
     flash_attn_unpadded,
     scaled_dot_product_attention,
 )
+from paddle.pir_utils import test_with_pir_api
 
 
 def get_cuda_version():
@@ -187,15 +188,10 @@ def test_unpadded(self):
                 fetch_list=[outs],
             )
 
-            np.testing.assert_allclose(
-                fetches_result[0], out_, rtol=5e-03, atol=1e-03
-            )
-
-    def test_all(self):
+    def test_dynamic_all(self):
         print(
-            f"Test case shape {self.shape} dtype {self.dtype} causal {self.causal}"
+            f"Test dynamic case shape {self.shape} dtype {self.dtype} causal {self.causal}"
         )
-        # test dynamic
         paddle.disable_static()
 
         query = np.random.random(self.shape)
@@ -255,9 +251,31 @@ def test_all(self):
             q.grad.numpy(), q_.grad.numpy(), rtol=5e-03, atol=1e-03
         )
 
-        # test static
+    @test_with_pir_api
+    def test_static_all(self):
+        print(
+            f"Test static case shape {self.shape} dtype {self.dtype} causal {self.causal}"
+        )
         paddle.enable_static()
 
+        query = np.random.random(self.shape)
+        key = np.random.random(self.shape)
+        value = np.random.random(self.shape)
+
+        q_ = paddle.to_tensor(
+            query, place=self.place, dtype=self.dtype, stop_gradient=False
+        )
+        k_ = paddle.to_tensor(
+            key, place=self.place, dtype=self.dtype, stop_gradient=False
+        )
+        v_ = paddle.to_tensor(
+            value, place=self.place, dtype=self.dtype, stop_gradient=False
+        )
+
+        out_ = attention_naive(q_, k_, v_, self.causal)
+
+        out_.backward()
+
         with paddle.static.program_guard(paddle.static.Program()):
             qs = paddle.static.data(
                 name="q", shape=self.shape, dtype=self.dtype