PaddlePaddle
diff --git a/‎paddle/fluid/pir/transforms/fusion/fused_weight_only_linear_pass.cc‎
Lines changed: 2 additions & 2 deletions b/‎paddle/fluid/pir/transforms/fusion/fused_weight_only_linear_pass.cc‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎test/ir/pir/fused_pass/CMakeLists.txt‎
Lines changed: 4 additions & 0 deletions b/‎test/ir/pir/fused_pass/CMakeLists.txt‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎test/ir/pir/fused_pass/pass_test.py‎
Lines changed: 53 additions & 23 deletions b/‎test/ir/pir/fused_pass/pass_test.py‎
Lines changed: 53 additions & 23 deletions
diff --git a/‎test/ir/pir/fused_pass/test_conv2d_add_act_fuse_pass.py‎
Lines changed: 46 additions & 53 deletions b/‎test/ir/pir/fused_pass/test_conv2d_add_act_fuse_pass.py‎
Lines changed: 46 additions & 53 deletions
diff --git a/‎test/ir/pir/fused_pass/test_conv2d_add_fuse_pass.py‎
Lines changed: 22 additions & 27 deletions b/‎test/ir/pir/fused_pass/test_conv2d_add_fuse_pass.py‎
Lines changed: 22 additions & 27 deletions
@@ -24,8 +24,8 @@
 namespace {
 
 int getSMVersion() {
-  int sm_version = 80;
-#if defined(PADDLE_WITH_CUDA)
+  int sm_version = -1;
+#if defined(PADDLE_WITH_CUDA) && defined(PADDLE_WITH_CUTLASS)
   sm_version = paddle::platform::GetGPUComputeCapability(
       paddle::platform::GetCurrentDeviceId());
 #else
 
@@ -3,6 +3,10 @@ file(
   RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
   "test_*.py")
 string(REPLACE ".py" "" TEST_INTERP_CASES "${TEST_INTERP_CASES}")
+if(NOT WITH_CUTLASS)
+  set(CUTLASS_TEST_CASES test_fused_weight_only_linear_pass)
+  list(REMOVE_ITEM TEST_INTERP_CASES ${CUTLASS_TEST_CASES})
+endif()
 
 foreach(target ${TEST_INTERP_CASES})
   py_test_modules(${target} MODULES ${target})
 
@@ -15,6 +15,8 @@
 import abc
 import unittest
 
+import numpy as np
+
 import paddle
 from paddle import pir
 
@@ -27,7 +29,8 @@ def setUpClass(self):
         self.valid_op_map = {}
         self.pass_list = []
         self.pir_program = None
-        self.place_runtime = "cpu"
+        self.places = []
+        self.skip_accuracy_verification = False
 
     def run_pir_pass(self, program):
         if not isinstance(self.pass_list, list):
@@ -36,7 +39,6 @@ def run_pir_pass(self, program):
         pm = pir.PassManager(opt_level=4)
         for pass_name in self.pass_list:
             pm.add_pass(pass_name)
-
         pm.run(program)
         return program
 
@@ -56,34 +58,62 @@ def check_fused_ops(self, program):
                 ),
             )
 
-    @abc.abstractmethod
-    def is_program_valid(self, program=None):
-        """
-        judge the effectiveness of the pir program
-        """
-        raise NotImplementedError
-
     @abc.abstractmethod
     def sample_program(self):
         """
         Generate all pir grogram
         """
         raise NotImplementedError
 
-    def check_pass_correct(self, atol=1e-5):
+    def run_program(self, executor, startup_program, main_program):
+        with paddle.pir_utils.IrGuard():
+            with paddle.static.program_guard(startup_program, main_program):
+                fetches = executor.run(
+                    main_program,
+                    feed=self.feeds,
+                    fetch_list=self.fetch_list,
+                )
+                return fetches
+
+    def compare_accuracy(
+        self, baseline_data, actual_data, atol=1e-5, rtol=1e-5
+    ):
         self.assertTrue(
-            self.place_runtime == "cpu" or self.place_runtime == "gpu",
-            "The place param must be either GPU or CPU ",
+            len(baseline_data) == len(actual_data),
+            f"The output baseline_data are not equal, the baseline output_data is {len(baseline_data)}, but got {len(actual_data)}",
         )
-        if self.place_runtime == "cpu":
-            executor = paddle.static.Executor(paddle.base.CPUPlace())
-        elif self.place_runtime == "gpu":
-            executor = paddle.static.Executor(paddle.base.CUDAPlace(0))
+        for i in range(len(baseline_data)):
+            self.assertEqual(
+                baseline_data[i].shape,
+                actual_data[i].shape,
+                f"The output shapes are not equal, the baseline shape is {baseline_data[i].shape}, but got {actual_data[i].shape}",
+            )
+            np.testing.assert_allclose(
+                baseline_data[i], actual_data[i], atol=atol, rtol=rtol
+            )
 
-        for program, need_translate_to_pir in self.sample_program():
-            if need_translate_to_pir:
-                program = pir.translate_to_pir(program.desc)
-            if not self.is_program_valid(program):
-                continue
-            program = self.run_pir_pass(program)
-            self.check_fused_ops(program)
+    def check_pass_correct(self, atol=1e-5, rtol=1e-5):
+        for place in self.places:
+            for program, need_translate_to_pir in self.sample_program():
+                main_program = program[0]
+                startup_program = program[1]
+                if need_translate_to_pir:
+                    main_program = pir.translate_to_pir(main_program.desc)
+                with paddle.pir_utils.IrGuard():
+                    with paddle.static.program_guard(
+                        main_program, startup_program
+                    ):
+                        executor = paddle.static.Executor(place)
+                        executor.run(startup_program)
+                baseline_fetch = self.run_program(
+                    executor, startup_program, main_program
+                )
+                main_program = self.run_pir_pass(main_program)
+                self.check_fused_ops(main_program)
+                actual_fetch = self.run_program(
+                    executor, startup_program, main_program
+                )
+                if self.skip_accuracy_verification is False:
+                    self.compare_accuracy(
+                        baseline_fetch, actual_fetch, atol, rtol
+                    )
@@ -18,14 +18,11 @@
 from pass_test import PassTest
 
 import paddle
+from paddle.base import core
 
 paddle.enable_static()
 
 
-@unittest.skipIf(
-    not paddle.base.core.is_compiled_with_cuda(),
-    "core is not complied with CUDA",
-)
 class TestConv2dAddActFusePattern(PassTest):
     r"""
       x_var   f_var
@@ -47,10 +44,10 @@ def is_program_valid(self, program):
         return True
 
     def build_ir_progam(self):
-        pir_program = None
         with paddle.pir_utils.IrGuard():
-            pir_program = paddle.static.Program()
-            with paddle.pir.core.program_guard(pir_program):
+            main_prog = paddle.static.Program()
+            start_prog = paddle.static.Program()
+            with paddle.pir.core.program_guard(main_prog, start_prog):
                 x = paddle.static.data(
                     name='x', shape=[3, 1, 28, 28], dtype='float32'
                 )
@@ -67,23 +64,26 @@ def build_ir_progam(self):
                 )
                 act_op = paddle.nn.ReLU()
                 out = act_op(paddle.add(conv2d(x), y))
-
-        self.pass_list = ['conv2d_add_act_fuse_pass']
-        self.feeds = {
-            "x": np.random.random((3, 32, 28, 28)).astype("float32"),
-            "y": np.random.random((3, 32, 28, 28)).astype("float32"),
-        }
-        self.fetch_list = [out]
-        self.valid_op_map = {
-            "pd_op.add": 0,
-            "pd_op.relu": 0,
-            "pd_op.conv2d": 0,
-            "pd_op.fused_conv2d_add_act": 1,
-        }
-        return pir_program
+                out = paddle.assign(out)
+                self.pass_list = ['conv2d_add_act_fuse_pass']
+                self.feeds = {
+                    "x": np.random.random((3, 1, 28, 28)).astype("float32"),
+                    "y": np.random.random((3, 32, 28, 28)).astype("float32"),
+                }
+                self.fetch_list = [out]
+                self.valid_op_map = {
+                    "pd_op.add": 0,
+                    "pd_op.relu": 0,
+                    "pd_op.conv2d": 0,
+                    "pd_op.fused_conv2d_add_act": 1,
+                }
+                return [main_prog, start_prog]
 
     def setUp(self):
-        self.place_runtime = "gpu"
+        if core.is_compiled_with_cuda():
+            self.places.append(paddle.CUDAPlace(0))
+        # todo(bukejiyu): This pass will support accuracy verification in the future
+        self.skip_accuracy_verification = True
 
     def sample_program(self):
         yield self.build_ir_progam(), False
@@ -92,15 +92,6 @@ def test_check_output(self):
         self.check_pass_correct()
 
 
-class TestConv2dAddActFusePatternWithCpu(TestConv2dAddActFusePattern):
-    def setUp(self):
-        self.place_runtime = "cpu"
-
-
-@unittest.skipIf(
-    not paddle.base.core.is_compiled_with_cuda(),
-    "core is not complied with CUDA",
-)
 class TestConv2dAdd2ActFusePattern(PassTest):
     r"""
      x_var   f_var(persistable)
@@ -124,10 +115,10 @@ def is_program_valid(self, program):
         return True
 
     def build_ir_progam(self):
-        pir_program = None
         with paddle.pir_utils.IrGuard():
-            pir_program = paddle.static.Program()
-            with paddle.pir.core.program_guard(pir_program):
+            main_prog = paddle.static.Program()
+            start_prog = paddle.static.Program()
+            with paddle.pir.core.program_guard(main_prog, start_prog):
                 x = paddle.static.data(
                     name='x', shape=[3, 1, 28, 28], dtype='float32'
                 )
@@ -149,22 +140,29 @@ def build_ir_progam(self):
                 out = act_op(
                     paddle.add(residual_data, paddle.add(conv2d(x), y))
                 )
-        self.pass_list = ['conv2d_add_act_fuse_pass']
-        self.feeds = {
-            "x": np.random.random((3, 32, 28, 28)).astype("float32"),
-            "y": np.random.random((3, 32, 28, 28)).astype("float32"),
-        }
-        self.fetch_list = [out]
-        self.valid_op_map = {
-            "pd_op.add": 0,
-            "pd_op.relu": 0,
-            "pd_op.conv2d": 0,
-            "pd_op.fused_conv2d_add_act": 1,
-        }
-        return pir_program
+                out = paddle.assign(out)
+                self.pass_list = ['conv2d_add_act_fuse_pass']
+                self.feeds = {
+                    "x": np.random.random((3, 1, 28, 28)).astype("float32"),
+                    "y": np.random.random((3, 32, 28, 28)).astype("float32"),
+                    "residual_data": np.random.random((3, 32, 28, 28)).astype(
+                        "float32"
+                    ),
+                }
+                self.fetch_list = [out]
+                self.valid_op_map = {
+                    "pd_op.add": 0,
+                    "pd_op.relu": 0,
+                    "pd_op.conv2d": 0,
+                    "pd_op.fused_conv2d_add_act": 1,
+                }
+                return [main_prog, start_prog]
 
     def setUp(self):
-        self.place_runtime = "gpu"
+        if core.is_compiled_with_cuda():
+            self.places.append(paddle.CUDAPlace(0))
+        # todo(bukejiyu): This pass will support accuracy verification in the future
+        self.skip_accuracy_verification = True
 
     def sample_program(self):
         yield self.build_ir_progam(), False
@@ -173,10 +171,5 @@ def test_check_output(self):
         self.check_pass_correct()
 
 
-class TestConv2dAdd2ActFusePatternWithCpu(TestConv2dAdd2ActFusePattern):
-    def setUp(self):
-        self.place_runtime = "cpu"
-
-
 if __name__ == "__main__":
     unittest.main()
@@ -18,14 +18,11 @@
 from pass_test import PassTest
 
 import paddle
+from paddle.base import core
 
 paddle.enable_static()
 
 
-@unittest.skipIf(
-    not paddle.base.core.is_compiled_with_cuda(),
-    "core is not complied with CUDA",
-)
 class TestConv2dAddFusePass(PassTest):
     r"""
     x_var   f_var
@@ -39,10 +36,10 @@ def is_program_valid(self, program=None):
         return True
 
     def build_ir_progam(self):
-        pir_program = None
         with paddle.pir_utils.IrGuard():
-            pir_program = paddle.static.Program()
-            with paddle.pir.core.program_guard(pir_program):
+            main_prog = paddle.static.Program()
+            start_prog = paddle.static.Program()
+            with paddle.pir.core.program_guard(main_prog, start_prog):
                 x = paddle.static.data(
                     name='x', shape=[3, 1, 28, 28], dtype='float32'
                 )
@@ -53,39 +50,37 @@ def build_ir_progam(self):
                     in_channels=1,
                     out_channels=32,
                     kernel_size=3,
-                    padding=1,
+                    padding="SAME",
                     data_format='NCHW',
                     bias_attr=False,
                 )
                 out = paddle.add(conv2d(x), y)
-
-        self.pass_list = ['conv2d_add_fuse_pass']
-        self.feeds = {
-            "x": np.random.random((3, 1, 28, 28)).astype("float32"),
-            "y": np.random.random((3, 32, 28, 28)).astype("float32"),
-        }
-        self.fetch_list = [out]
-        self.valid_op_map = {
-            "pd_op.fused_conv2d_add_act": 1,
-            "pd_op.conv2d": 0,
-            "pd_op.add": 0,
-        }
-        return pir_program
+                out = paddle.assign(out)
+                self.pass_list = ['conv2d_add_fuse_pass']
+                self.feeds = {
+                    "x": np.random.random((3, 1, 28, 28)).astype("float32"),
+                    "y": np.random.random((3, 32, 28, 28)).astype("float32"),
+                }
+                self.fetch_list = [out]
+                self.valid_op_map = {
+                    "pd_op.fused_conv2d_add_act": 1,
+                    "pd_op.conv2d": 0,
+                    "pd_op.add": 0,
+                }
+                return [main_prog, start_prog]
 
     def sample_program(self):
         yield self.build_ir_progam(), False
 
     def setUp(self):
-        self.place_runtime = "gpu"
+        if core.is_compiled_with_cuda():
+            self.places.append(paddle.CUDAPlace(0))
+        # todo(bukejiyu): This pass will support accuracy verification in the future
+        self.skip_accuracy_verification = True
 
     def test_check_output(self):
         self.check_pass_correct()
 
 
-class TestConv2dAddFusePassWtihCpu(TestConv2dAddFusePass):
-    def setUp(self):
-        self.place_runtime = "cpu"
-
-
 if __name__ == "__main__":
     unittest.main()