From 1cfad65d60efbab1cd5fb568b448f8d8cb1a283c Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Fri, 26 Apr 2024 11:22:37 +0000
Subject: [PATCH 1/6] [PIR] Throw error when OP has no grad OP

---
 python/paddle/autograd/ir_backward.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/python/paddle/autograd/ir_backward.py b/python/paddle/autograd/ir_backward.py
index 4614856ed86ae9..f4879e35f2511b 100644
--- a/python/paddle/autograd/ir_backward.py
+++ b/python/paddle/autograd/ir_backward.py
@@ -55,6 +55,8 @@
 """
 __all__ = ['grad', 'calc_gradient', 'calc_gradient_helper']
 
+ALLOW_NO_GRAD_OPS = ["pd_op.full_like"]
+
 
 def append_full_like(float_value, copy_value, value, state, backward_ops):
     with paddle.amp.auto_cast(enable=False):
@@ -834,7 +836,10 @@ def append_yield(
                         else:
                             state.op_to_opgrad[op] = []
                 else:
-                    logging.warning("%s op has no grad op", op.name())
+                    if op.name() not in ALLOW_NO_GRAD_OPS:
+                        raise ValueError(
+                            f"op {op.name()} has no grad op, consider enable prim to decompose it."
+                        )
                     state.op_to_opgrad[op] = []
 
         if fwd_block != bwd_block:
@@ -1202,9 +1207,11 @@ def append_backward(loss, parameter_list=None, no_grad_set=None):
         input_inputs_grad.append(
             (
                 input,
-                input_to_inputgrad_map[input][0][0]
-                if input_to_inputgrad_map[input] != []
-                else None,
+                (
+                    input_to_inputgrad_map[input][0][0]
+                    if input_to_inputgrad_map[input] != []
+                    else None
+                ),
             )
         )
 

From db8b5753f49910424530745c474ed406882482e3 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Fri, 26 Apr 2024 11:33:19 +0000
Subject: [PATCH 2/6] add ut

---
 test/dygraph_to_static/test_high_order_net.py | 58 +++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 test/dygraph_to_static/test_high_order_net.py

diff --git a/test/dygraph_to_static/test_high_order_net.py b/test/dygraph_to_static/test_high_order_net.py
new file mode 100644
index 00000000000000..9d116528ea649d
--- /dev/null
+++ b/test/dygraph_to_static/test_high_order_net.py
@@ -0,0 +1,58 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from dygraph_to_static_utils import (
+    Dy2StTestBase,
+    test_ast_only,
+    test_pir_only,
+)
+
+import paddle
+
+
+class HighOrderNet(paddle.nn.Layer):
+    def __init__(self):
+        super().__init__()
+        self.linear = paddle.nn.Linear(3, 4, bias_attr=False)
+
+    def forward(self, x):
+        y = self.linear(x)
+        z = paddle.pow(y, 2)
+        x_grad = paddle.grad(z, x, create_graph=True)[0]
+        x_grad_grad = paddle.grad(x_grad, x, create_graph=True)[0]
+        return x_grad_grad.mean()
+
+
+class TestBackwardHasNoGradError(Dy2StTestBase):
+    @test_ast_only
+    @test_pir_only
+    def test_backward_has_no_grad_error(self):
+        net = HighOrderNet()
+        static_net = paddle.jit.to_static(net, full_graph=True)
+
+        x = paddle.to_tensor([[1, 1, 1], [1, 1, 1]], 'float32')
+        x.stop_gradient = False
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "op 'pd_op.matmul_double_grad' has no grad op, consider enable prim to decompose it.",
+        ):
+            x_grad_grad = static_net(x)
+            x_grad_grad.backward()
+
+
+if __name__ == "__main__":
+    unittest.main()

From d68135f224175627d64f2734d056b8251031802d Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Fri, 26 Apr 2024 11:53:32 +0000
Subject: [PATCH 3/6] update error msg

---
 python/paddle/autograd/ir_backward.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/paddle/autograd/ir_backward.py b/python/paddle/autograd/ir_backward.py
index f4879e35f2511b..413e0a3ad7a29d 100644
--- a/python/paddle/autograd/ir_backward.py
+++ b/python/paddle/autograd/ir_backward.py
@@ -838,7 +838,7 @@ def append_yield(
                 else:
                     if op.name() not in ALLOW_NO_GRAD_OPS:
                         raise ValueError(
-                            f"op {op.name()} has no grad op, consider enable prim to decompose it."
+                            f"op '{op.name()}' has no grad op, consider enable prim to decompose it."
                         )
                     state.op_to_opgrad[op] = []
 

From 5f6b38aa68b14067b769900597afe50eef16e1c9 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Fri, 26 Apr 2024 14:31:56 +0000
Subject: [PATCH 4/6] update allow list

---
 python/paddle/autograd/ir_backward.py | 62 ++++++++++++++++++++++++++-
 1 file changed, 60 insertions(+), 2 deletions(-)

diff --git a/python/paddle/autograd/ir_backward.py b/python/paddle/autograd/ir_backward.py
index 413e0a3ad7a29d..f3826c0c50fb27 100644
--- a/python/paddle/autograd/ir_backward.py
+++ b/python/paddle/autograd/ir_backward.py
@@ -55,7 +55,62 @@
 """
 __all__ = ['grad', 'calc_gradient', 'calc_gradient_helper']
 
-ALLOW_NO_GRAD_OPS = ["pd_op.full_like"]
+# TODO: Consider a better way to mark these ops has no grad op.
+# Such as use a new trait to mark these ops.
+ALLOW_NO_GRAD_OPS = [
+    # Compare ops
+    "pd_op.equal",
+    "pd_op.equal_",
+    "pd_op.not_equal",
+    "pd_op.not_equal_",
+    "pd_op.less_than",
+    "pd_op.less_than_",
+    "pd_op.less_equal",
+    "pd_op.less_equal_",
+    "pd_op.greater_than",
+    "pd_op.greater_than_",
+    "pd_op.greater_equal",
+    "pd_op.greater_equal_",
+    # Logical ops
+    "pd_op.logical_and",
+    "pd_op.logical_and_",
+    "pd_op.logical_not",
+    "pd_op.logical_not_",
+    "pd_op.logical_or",
+    "pd_op.logical_or_",
+    "pd_op.logical_xor",
+    "pd_op.logical_xor_",
+    # Array ops
+    "pd_op.assign_array",
+    "pd_op.array_length",
+    "pd_op.slice_array",
+    "pd_op.slice_array_dense",
+    "pd_op.assign_array",
+    "pd_op.assign_array_",
+    "pd_op.create_array",
+    "pd_op.create_array_like",
+    "pd_op.array_read",
+    "pd_op.array_write_",
+    "pd_op.array_pop",
+    # Others
+    "pd_op.remainder",
+    "pd_op.argmax",
+    "pd_op.print",
+    "pd_op.accuracy",
+    "pd_op.uniform",
+    "pd_op.gaussian",
+    "pd_op.bernoulli",
+    "pd_op.full_like",
+    "pd_op.assign_value_",
+    "pd_op.nextafter",
+    "pd_op.isnan",
+    "pd_op.isinf",
+]
+
+
+def is_builtin_op(op):
+    dialect_name, opname = op.name().split(".")
+    return dialect_name == "builtin"
 
 
 def append_full_like(float_value, copy_value, value, state, backward_ops):
@@ -836,7 +891,10 @@ def append_yield(
                         else:
                             state.op_to_opgrad[op] = []
                 else:
-                    if op.name() not in ALLOW_NO_GRAD_OPS:
+                    if (
+                        not is_builtin_op(op)
+                        and op.name() not in ALLOW_NO_GRAD_OPS
+                    ):
                         raise ValueError(
                             f"op '{op.name()}' has no grad op, consider enable prim to decompose it."
                         )

From f7fa24a14ef1ace12ed28a8944dd19f77f7c1951 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Fri, 26 Apr 2024 15:27:59 +0000
Subject: [PATCH 5/6] update allow list

---
 python/paddle/autograd/ir_backward.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/python/paddle/autograd/ir_backward.py b/python/paddle/autograd/ir_backward.py
index f3826c0c50fb27..a500419713d114 100644
--- a/python/paddle/autograd/ir_backward.py
+++ b/python/paddle/autograd/ir_backward.py
@@ -80,6 +80,19 @@
     "pd_op.logical_or_",
     "pd_op.logical_xor",
     "pd_op.logical_xor_",
+    # Bitwise ops
+    "pd_op.bitwise_and",
+    "pd_op.bitwise_and_",
+    "pd_op.bitwise_left_shift",
+    "pd_op.bitwise_left_shift_",
+    "pd_op.bitwise_not",
+    "pd_op.bitwise_not_",
+    "pd_op.bitwise_or",
+    "pd_op.bitwise_or_",
+    "pd_op.bitwise_right_shift",
+    "pd_op.bitwise_right_shift_",
+    "pd_op.bitwise_xor",
+    "pd_op.bitwise_xor_",
     # Array ops
     "pd_op.assign_array",
     "pd_op.array_length",
@@ -105,6 +118,8 @@
     "pd_op.nextafter",
     "pd_op.isnan",
     "pd_op.isinf",
+    "pd_op.all",
+    "pd_op.any",
 ]
 
 

From 22d34ea5f8f24d3001b63bd94ab1d3f510f81479 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Sun, 28 Apr 2024 02:47:29 +0000
Subject: [PATCH 6/6] move constants to backward_utils.py

---
 python/paddle/autograd/backward_utils.py | 72 +++++++++++++++++++++++
 python/paddle/autograd/ir_backward.py    | 74 +-----------------------
 2 files changed, 74 insertions(+), 72 deletions(-)

diff --git a/python/paddle/autograd/backward_utils.py b/python/paddle/autograd/backward_utils.py
index bdd2756e09cd66..3eb4c01406704c 100644
--- a/python/paddle/autograd/backward_utils.py
+++ b/python/paddle/autograd/backward_utils.py
@@ -25,6 +25,73 @@
 )
 from paddle.base.wrapped_decorator import signature_safe_contextmanager
 
+# TODO: Consider a better way to mark these ops has no grad op.
+# Such as use a new trait to mark these ops.
+ALLOW_NO_GRAD_OPS = [
+    # Compare ops
+    "pd_op.equal",
+    "pd_op.equal_",
+    "pd_op.not_equal",
+    "pd_op.not_equal_",
+    "pd_op.less_than",
+    "pd_op.less_than_",
+    "pd_op.less_equal",
+    "pd_op.less_equal_",
+    "pd_op.greater_than",
+    "pd_op.greater_than_",
+    "pd_op.greater_equal",
+    "pd_op.greater_equal_",
+    # Logical ops
+    "pd_op.logical_and",
+    "pd_op.logical_and_",
+    "pd_op.logical_not",
+    "pd_op.logical_not_",
+    "pd_op.logical_or",
+    "pd_op.logical_or_",
+    "pd_op.logical_xor",
+    "pd_op.logical_xor_",
+    # Bitwise ops
+    "pd_op.bitwise_and",
+    "pd_op.bitwise_and_",
+    "pd_op.bitwise_left_shift",
+    "pd_op.bitwise_left_shift_",
+    "pd_op.bitwise_not",
+    "pd_op.bitwise_not_",
+    "pd_op.bitwise_or",
+    "pd_op.bitwise_or_",
+    "pd_op.bitwise_right_shift",
+    "pd_op.bitwise_right_shift_",
+    "pd_op.bitwise_xor",
+    "pd_op.bitwise_xor_",
+    # Array ops
+    "pd_op.assign_array",
+    "pd_op.array_length",
+    "pd_op.slice_array",
+    "pd_op.slice_array_dense",
+    "pd_op.assign_array",
+    "pd_op.assign_array_",
+    "pd_op.create_array",
+    "pd_op.create_array_like",
+    "pd_op.array_read",
+    "pd_op.array_write_",
+    "pd_op.array_pop",
+    # Others
+    "pd_op.remainder",
+    "pd_op.argmax",
+    "pd_op.print",
+    "pd_op.accuracy",
+    "pd_op.uniform",
+    "pd_op.gaussian",
+    "pd_op.bernoulli",
+    "pd_op.full_like",
+    "pd_op.assign_value_",
+    "pd_op.nextafter",
+    "pd_op.isnan",
+    "pd_op.isinf",
+    "pd_op.all",
+    "pd_op.any",
+]
+
 
 class ValueWrapper:
     def __init__(self, value) -> None:
@@ -281,6 +348,11 @@ def is_control_flow(op):
     return op.name() == "pd_op.if" or op.name() == "pd_op.while"
 
 
+def is_builtin_op(op):
+    dialect_name, opname = op.name().split(".")
+    return dialect_name == "builtin"
+
+
 def update_no_grad_set_by_stopgradient(block, no_grad_set):
     for op in block.ops:
         if is_control_flow(op):
diff --git a/python/paddle/autograd/ir_backward.py b/python/paddle/autograd/ir_backward.py
index a500419713d114..07e283b7617f79 100644
--- a/python/paddle/autograd/ir_backward.py
+++ b/python/paddle/autograd/ir_backward.py
@@ -18,6 +18,7 @@
 
 import paddle.pir
 from paddle.autograd.backward_utils import (
+    ALLOW_NO_GRAD_OPS,
     State,
     ValueDict,
     ValueSet,
@@ -32,6 +33,7 @@
     get_real_op_inputs,
     get_split_op,
     inverse_sort_op,
+    is_builtin_op,
     is_control_flow,
     is_inplace_net,
     parent_total_ops,
@@ -55,78 +57,6 @@
 """
 __all__ = ['grad', 'calc_gradient', 'calc_gradient_helper']
 
-# TODO: Consider a better way to mark these ops has no grad op.
-# Such as use a new trait to mark these ops.
-ALLOW_NO_GRAD_OPS = [
-    # Compare ops
-    "pd_op.equal",
-    "pd_op.equal_",
-    "pd_op.not_equal",
-    "pd_op.not_equal_",
-    "pd_op.less_than",
-    "pd_op.less_than_",
-    "pd_op.less_equal",
-    "pd_op.less_equal_",
-    "pd_op.greater_than",
-    "pd_op.greater_than_",
-    "pd_op.greater_equal",
-    "pd_op.greater_equal_",
-    # Logical ops
-    "pd_op.logical_and",
-    "pd_op.logical_and_",
-    "pd_op.logical_not",
-    "pd_op.logical_not_",
-    "pd_op.logical_or",
-    "pd_op.logical_or_",
-    "pd_op.logical_xor",
-    "pd_op.logical_xor_",
-    # Bitwise ops
-    "pd_op.bitwise_and",
-    "pd_op.bitwise_and_",
-    "pd_op.bitwise_left_shift",
-    "pd_op.bitwise_left_shift_",
-    "pd_op.bitwise_not",
-    "pd_op.bitwise_not_",
-    "pd_op.bitwise_or",
-    "pd_op.bitwise_or_",
-    "pd_op.bitwise_right_shift",
-    "pd_op.bitwise_right_shift_",
-    "pd_op.bitwise_xor",
-    "pd_op.bitwise_xor_",
-    # Array ops
-    "pd_op.assign_array",
-    "pd_op.array_length",
-    "pd_op.slice_array",
-    "pd_op.slice_array_dense",
-    "pd_op.assign_array",
-    "pd_op.assign_array_",
-    "pd_op.create_array",
-    "pd_op.create_array_like",
-    "pd_op.array_read",
-    "pd_op.array_write_",
-    "pd_op.array_pop",
-    # Others
-    "pd_op.remainder",
-    "pd_op.argmax",
-    "pd_op.print",
-    "pd_op.accuracy",
-    "pd_op.uniform",
-    "pd_op.gaussian",
-    "pd_op.bernoulli",
-    "pd_op.full_like",
-    "pd_op.assign_value_",
-    "pd_op.nextafter",
-    "pd_op.isnan",
-    "pd_op.isinf",
-    "pd_op.all",
-    "pd_op.any",
-]
-
-
-def is_builtin_op(op):
-    dialect_name, opname = op.name().split(".")
-    return dialect_name == "builtin"
-
 
 def append_full_like(float_value, copy_value, value, state, backward_ops):
     with paddle.amp.auto_cast(enable=False):