diff --git a/paddle/fluid/eager/api/manual/eager_manual/forwards/multiply_fwd_func.cc b/paddle/fluid/eager/api/manual/eager_manual/forwards/multiply_fwd_func.cc
index 4c03ee6ef486b1..b79953b9b35b93 100644
--- a/paddle/fluid/eager/api/manual/eager_manual/forwards/multiply_fwd_func.cc
+++ b/paddle/fluid/eager/api/manual/eager_manual/forwards/multiply_fwd_func.cc
@@ -140,7 +140,7 @@ paddle::Tensor multiply_ad_func(const paddle::Tensor& x,
   }
 
   // Forward API Call
-  auto api_result = paddle::experimental::multiply(x, y);
+  auto api_result = paddle::experimental::multiply(x, y, input_out);
   // Check NaN and Inf if needed
 
   if (FLAGS_check_nan_inf) {
diff --git a/paddle/phi/ops/yaml/ops.yaml b/paddle/phi/ops/yaml/ops.yaml
index b5f4d6371a82b1..fa5194c1f52fa9 100644
--- a/paddle/phi/ops/yaml/ops.yaml
+++ b/paddle/phi/ops/yaml/ops.yaml
@@ -1209,6 +1209,10 @@
 
 - op : cos
   args : (Tensor x)
+  python_api:
+    name: [paddle.cos, paddle.Tensor.cos]
+    args_alias:
+      use_default_mapping : True
   output : Tensor(out)
   infer_meta :
     func : UnchangedInferMeta
@@ -2168,6 +2172,10 @@
 
 - op : floor
   args : (Tensor x)
+  python_api:
+    name: [paddle.floor, paddle.Tensor.floor]
+    args_alias:
+      use_default_mapping : True
   output : Tensor(out)
   infer_meta :
     func : UnchangedInferMeta
@@ -3164,6 +3172,10 @@
 
 - op : log
   args : (Tensor x)
+  python_api:
+    name: [paddle.log, paddle.Tensor.log]
+    args_alias:
+      use_default_mapping : True
   output : Tensor(out)
   infer_meta :
     func : UnchangedInferMeta
@@ -4662,6 +4674,10 @@
 
 - op : rsqrt
   args : (Tensor x)
+  python_api:
+    name: [paddle.sqrt, paddle.Tensor.rsqrt]
+    args_alias:
+      use_default_mapping : True
   output : Tensor(out)
   infer_meta :
     func : UnchangedInferMeta
@@ -4954,6 +4970,10 @@
 
 - op : sign
   args : (Tensor x)
+  python_api :
+    name: [paddle.sign, paddle.Tensor.sign]
+    args_alias:
+      use_default_mapping : True
   output : Tensor(out)
   infer_meta :
     func : UnchangedInferMeta
@@ -4978,6 +4998,10 @@
 
 - op : sin
   args : (Tensor x)
+  python_api :
+    name: [paddle.sin, paddle.Tensor.sin]
+    args_alias:
+      use_default_mapping : True
   output : Tensor(out)
   infer_meta :
     func : UnchangedInferMeta
diff --git a/python/paddle/_paddle_docs.py b/python/paddle/_paddle_docs.py
index 29e551e4a4841e..36aa279f28b484 100644
--- a/python/paddle/_paddle_docs.py
+++ b/python/paddle/_paddle_docs.py
@@ -509,6 +509,222 @@ def isnan(
 # shenwei
 
 # zhouxin
+add_doc_and_signature(
+    "sin",
+    """
+    Sine Activation Operator.
+
+    .. math::
+       out = sin(x)
+
+    Args:
+        x (Tensor): Input of Sin operator, an N-D Tensor, with data type float32, float64, float16, bfloat16,
+            uint8, int8, int16, int32, int64, complex64 or complex128. Alias: ``input``.
+        name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
+        out (Tensor, optional): The output tensor. If set, the result will be stored in this tensor. Default is None.
+
+    Returns:
+        Tensor. Output of Sin operator, a Tensor with shape same as input
+            (integer types are autocasted into float32).
+
+    Examples:
+        .. code-block:: python
+
+            >>> import paddle
+
+            >>> x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
+            >>> out = paddle.sin(x)
+            >>> print(out)
+            Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [-0.38941833, -0.19866933,  0.09983342,  0.29552022])
+    """,
+    """
+def sin(
+    x: Tensor, name: str | None = None, *, out: Tensor | None = None
+) -> Tensor
+    """,
+)
+
+add_doc_and_signature(
+    "sign",
+    """
+    Returns sign of every element in `x`: For real numbers, 1 for positive, -1 for negative and 0 for zero. For complex numbers, the return value is a complex number with unit magnitude. If a complex number element is zero, the result is 0+0j.
+
+    Args:
+        x (Tensor): The input tensor. The data type can be uint8, int8, int16, int32, int64, bfloat16, float16, float32, float64, complex64 or complex128. Alias: ``input``.
+        name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
+        out (Tensor|None, optional): The output tensor. If set, the result will be stored in this tensor. Default is None.
+
+    Returns:
+        Tensor: The output sign tensor with identical shape and data type to the input :attr:`x`.
+
+    Examples:
+        .. code-block:: python
+
+            >>> import paddle
+
+            >>> x = paddle.to_tensor([3.0, 0.0, -2.0, 1.7], dtype='float32')
+            >>> out = paddle.sign(x=x)
+            >>> out
+            Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [ 1.,  0., -1.,  1.])
+    """,
+    """
+def sign(
+    x: Tensor, name: str | None = None, *, out: Tensor | None = None
+) -> Tensor
+    """,
+)
+
+add_doc_and_signature(
+    "log",
+    r"""
+    Calculates the natural log of the given input Tensor, element-wise.
+
+    .. math::
+
+        Out = \ln(x)
+
+    Args:
+        x (Tensor): Input Tensor. Must be one of the following types: int32, int64, float16, bfloat16, float32, float64, complex64, complex128. Alias: ``input``.
+        name (str|None): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`
+        out (Tensor, optional): The output Tensor. If set, the result will be stored in this tensor. Default is None.
+
+
+    Returns:
+        Tensor: The natural log of the input Tensor computed element-wise.
+
+    Examples:
+
+        .. code-block:: python
+
+            >>> import paddle
+
+            >>> x = [[2, 3, 4], [7, 8, 9]]
+            >>> x = paddle.to_tensor(x, dtype='float32')
+            >>> print(paddle.log(x))
+            Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[0.69314718, 1.09861231, 1.38629436],
+             [1.94591010, 2.07944155, 2.19722462]])
+    """,
+    """
+def log(
+    x: Tensor, name: str | None = None, *, out: Tensor | None = None
+) -> Tensor
+    """,
+)
+
+add_doc_and_signature(
+    "rsqrt",
+    """
+    Rsqrt Activation Operator.
+
+    Please make sure input is legal in case of numeric errors.
+
+    .. math::
+       out = \\frac{1}{\\sqrt{x}}
+
+    Args:
+        x (Tensor): Input of Rsqrt operator, an N-D Tensor, with data type float32, float64, float16, bfloat16,
+            uint8, int8, int16, int32, int64. Alias: ``input``.
+        name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
+        out (Tensor, optional): The output tensor. If set, the result will be stored in this tensor. Default is None.
+
+    Returns:
+        Tensor. Output of Rsqrt operator, a Tensor with shape same as input
+            (integer types are autocasted into float32).
+
+    Examples:
+        .. code-block:: python
+
+            >>> import paddle
+
+            >>> x = paddle.to_tensor([0.1, 0.2, 0.3, 0.4])
+            >>> out = paddle.rsqrt(x)
+            >>> print(out)
+            Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [3.16227770, 2.23606801, 1.82574177, 1.58113885])
+    """,
+    """
+def rsqrt(
+    x: Tensor, name: str | None = None, *, out: Tensor | None = None
+) -> Tensor
+    """,
+)
+
+add_doc_and_signature(
+    "cos",
+    """
+    Cosine Operator. Computes cosine of x element-wise.
+
+    Input range is `(-inf, inf)` and output range is `[-1,1]`.
+
+    .. math::
+       out = cos(x)
+
+    Args:
+        x (Tensor): Input of Cos operator, an N-D Tensor, with data type float32, float64, float16, bfloat16,
+            uint8, int8, int16, int32, int64, complex64, complex128. Alias: ``input``.
+        name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
+        out (Tensor, optional): The output tensor. If set, the result will be stored in this tensor. Default is None.
+
+    Returns:
+        Tensor. Output of Cos operator, a Tensor with shape same as input
+            (integer types are autocasted into float32).
+
+    Examples:
+        .. code-block:: python
+
+            >>> import paddle
+
+            >>> x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
+            >>> out = paddle.cos(x)
+            >>> print(out)
+            Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [0.92106098, 0.98006660, 0.99500418, 0.95533651])
+    """,
+    """
+def cos(
+    x: Tensor, name: str | None = None, *, out: Tensor | None = None
+) -> Tensor
+    """,
+)
+
+add_doc_and_signature(
+    "floor",
+    """
+    Floor Activation Operator. Computes floor of x element-wise.
+
+    .. math::
+        out = \\lfloor x \\rfloor
+
+    Args:
+        x (Tensor): Input of Floor operator, an N-D Tensor, with data type float32, float64, float16, bfloat16,
+            uint8, int8, int16, int32, int64. Alias: ``input``.
+        name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
+        out (Tensor, optional): The output tensor. If set, the result will be stored in this tensor. Default is None.
+
+    Returns:
+        Tensor. Output of Floor operator, a Tensor with shape same as input
+            (integer types are autocasted into float32).
+
+    Examples:
+        .. code-block:: python
+
+            >>> import paddle
+
+            >>> x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
+            >>> out = paddle.floor(x)
+            >>> print(out)
+            Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [-1., -1.,  0.,  0.])
+    """,
+    """
+def floor(
+    x: Tensor, name: str | None = None, *, out: Tensor | None = None
+) -> Tensor
+    """,
+)
 
 # hehongyu
 
diff --git a/python/paddle/nn/quant/functional_layers.py b/python/paddle/nn/quant/functional_layers.py
index 670984fe4f9c78..880304913e0e8c 100644
--- a/python/paddle/nn/quant/functional_layers.py
+++ b/python/paddle/nn/quant/functional_layers.py
@@ -28,7 +28,7 @@ def __init__(self):
         super().__init__()
 
     def forward(self, x, y, name=None):
-        return math.add(x, y, name)
+        return math.add(x, y, name=name)
 
 
 class subtract(FloatFunctionalLayer):
@@ -36,7 +36,7 @@ def __init__(self):
         super().__init__()
 
     def forward(self, x, y, name=None):
-        return math.subtract(x, y, name)
+        return math.subtract(x, y, name=name)
 
 
 class multiply(FloatFunctionalLayer):
@@ -44,7 +44,7 @@ def __init__(self):
         super().__init__()
 
     def forward(self, x, y, name=None):
-        return math.multiply(x, y, name)
+        return math.multiply(x, y, name=name)
 
 
 class divide(FloatFunctionalLayer):
@@ -52,7 +52,7 @@ def __init__(self):
         super().__init__()
 
     def forward(self, x, y, name=None):
-        return math.divide(x, y, name)
+        return math.divide(x, y, name=name)
 
 
 class reshape(FloatFunctionalLayer):
@@ -60,7 +60,7 @@ def __init__(self):
         super().__init__()
 
     def forward(self, x, shape, name=None):
-        return manipulation.reshape(x, shape, name)
+        return manipulation.reshape(x, shape, name=name)
 
 
 class transpose(FloatFunctionalLayer):
@@ -68,7 +68,7 @@ def __init__(self):
         super().__init__()
 
     def forward(self, x, perm, name=None):
-        return manipulation.transpose(x, perm, name)
+        return manipulation.transpose(x, perm, name=name)
 
 
 class concat(FloatFunctionalLayer):
@@ -76,7 +76,7 @@ def __init__(self):
         super().__init__()
 
     def forward(self, x, axis=0, name=None):
-        return manipulation.concat(x, axis, name)
+        return manipulation.concat(x, axis, name=name)
 
 
 class flatten(FloatFunctionalLayer):
@@ -84,7 +84,7 @@ def __init__(self):
         super().__init__()
 
     def forward(self, x, start_axis=0, stop_axis=-1, name=None):
-        return manipulation.flatten(x, start_axis, stop_axis, name)
+        return manipulation.flatten(x, start_axis, stop_axis, name=name)
 
 
 class matmul(FloatFunctionalLayer):
@@ -92,4 +92,4 @@ def __init__(self):
         super().__init__()
 
     def forward(self, x, y, transpose_x=False, transpose_y=False, name=None):
-        return linalg.matmul(x, y, transpose_x, transpose_y, name)
+        return linalg.matmul(x, y, transpose_x, transpose_y, name=name)
diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py
index a2415681ea1c3e..92bc5dce6e949e 100644
--- a/python/paddle/tensor/creation.py
+++ b/python/paddle/tensor/creation.py
@@ -3558,7 +3558,11 @@ def _memcpy(input, place=None, output=None) -> paddle.Tensor:
 
 
 def complex(
-    real: paddle.Tensor, imag: paddle.Tensor, out=None, name: str | None = None
+    real: paddle.Tensor,
+    imag: paddle.Tensor,
+    name: str | None = None,
+    *,
+    out: paddle.Tensor | None = None,
 ) -> paddle.Tensor:
     """Return a complex tensor given the real and image component.
 
@@ -3780,14 +3784,19 @@ def triu_indices(
 
 
 def polar(
-    abs: paddle.Tensor, angle: paddle.Tensor, name: str | None = None
+    abs: paddle.Tensor,
+    angle: paddle.Tensor,
+    name: str | None = None,
+    *,
+    out: paddle.Tensor | None = None,
 ) -> paddle.Tensor:
     """Return a Cartesian coordinates corresponding to the polar coordinates complex tensor given the ``abs`` and ``angle`` component.
 
     Args:
         abs (Tensor): The abs component. The data type should be 'float32' or 'float64'.
         angle (Tensor): The angle component. The data type should be the same as ``abs``.
-        name(str|None, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
+        name (str|None, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
+        out (Tensor, optional): The output tensor. If set, the result will be stored in this tensor. Default is None.
 
     Returns:
         Tensor, The output tensor. The data type is 'complex64' or 'complex128', with the same precision as ``abs`` and ``angle``.
@@ -3816,7 +3825,9 @@ def polar(
         angle, 'angle', ['float32', 'float64'], 'paddle.polar'
     )
 
-    return paddle.complex(abs * paddle.cos(angle), abs * paddle.sin(angle))
+    return paddle.complex(
+        abs * paddle.cos(angle), abs * paddle.sin(angle), out=out, name=name
+    )
 
 
 @dygraph_only
diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
index 403f48d17c2334..06189340be11df 100644
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -2199,8 +2199,13 @@ def roll(
         return out
 
 
+@ParamAliasDecorator({"x": ["tensors"], "axis": ["dim"]})
 def stack(
-    x: Sequence[Tensor], axis: int = 0, name: str | None = None
+    x: Sequence[Tensor],
+    axis: int = 0,
+    name: str | None = None,
+    *,
+    out: Tensor | None = None,
 ) -> Tensor:
     """
     Stacks all the input tensors ``x`` along ``axis`` dimension.
@@ -2296,11 +2301,12 @@ def stack(
 
     Args:
         x (list[Tensor]|tuple[Tensor]): Input ``x`` can be a ``list`` or ``tuple`` of tensors, the Tensors in ``x``
-                                     must be of the same shape and dtype. Supported data types: float32, float64, int32, int64.
+                                     must be of the same shape and dtype. Supported data types: float32, float64, int32, int64. Alias: ``tensors``.
         axis (int, optional): The axis along which all inputs are stacked. ``axis`` range is ``[-(R+1), R+1)``,
                               where ``R`` is the number of dimensions of the first input tensor ``x[0]``.
-                              If ``axis < 0``, ``axis = axis+R+1``. The default value of axis is 0.
+                              If ``axis < 0``, ``axis = axis+R+1``. The default value of axis is 0. Alias: ``dim``.
         name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
+        out (Tensor, optional): The output tensor. If set, the output will be written to this tensor.
 
     Returns:
         Tensor, The stacked tensor with same data type as input.
@@ -2354,7 +2360,7 @@ def stack(
     axis = 0 if axis is None else axis
 
     if in_dynamic_mode():
-        return _C_ops.stack(x, axis)
+        return _C_ops.stack(x, axis, out=out)
 
     if not isinstance(x, list) and not isinstance(x, tuple):
         # NOTE:(zhiqiu) Only support Variable as input if the Variable is a DENSE_TENSOR_ARRAY create by create_array, array_write, array_read, etc.
diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
index 1f84b1d6067e4f..6a79355cdc4823 100644
--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -29,6 +29,9 @@
     isfinite,
     isinf,
     isnan,
+    log,
+    sign,
+    sin,
 )
 from paddle.base.libpaddle import DataType
 from paddle.common_ops_import import VarDesc, dygraph_utils
@@ -91,7 +94,6 @@
     rsqrt_,
     sigmoid,
     sigmoid_,
-    sin,
     sin_,
     sinh,
     sinh_,
@@ -162,61 +164,6 @@ def _get_reduce_axis_with_tensor(axis, x):
     return reduce_all, axis
 
 
-def log(x: Tensor, name: str | None = None) -> Tensor:
-    r"""
-    Calculates the natural log of the given input Tensor, element-wise.
-
-    .. math::
-
-        Out = \ln(x)
-
-    Args:
-        x (Tensor): Input Tensor. Must be one of the following types: int32, int64, float16, bfloat16, float32, float64, complex64, complex128.
-        name (str|None): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`
-
-
-    Returns:
-        Tensor: The natural log of the input Tensor computed element-wise.
-
-    Examples:
-
-        .. code-block:: python
-
-            >>> import paddle
-
-            >>> x = [[2, 3, 4], [7, 8, 9]]
-            >>> x = paddle.to_tensor(x, dtype='float32')
-            >>> print(paddle.log(x))
-            Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
-            [[0.69314718, 1.09861231, 1.38629436],
-             [1.94591010, 2.07944155, 2.19722462]])
-    """
-    if in_dynamic_or_pir_mode():
-        return _C_ops.log(x)
-    else:
-        check_variable_and_dtype(
-            x,
-            'x',
-            [
-                'int32',
-                'int64',
-                'uint16',
-                'float16',
-                'float32',
-                'float64',
-                'complex64',
-                'complex128',
-            ],
-            "log",
-        )
-        inputs = {'X': [x]}
-        helper = LayerHelper('log', **locals())
-        dtype = helper.input_dtype(input_param_name='x')
-        out = helper.create_variable_for_type_inference(dtype)
-        helper.append_op(type="log", inputs={"X": x}, outputs={"Out": out})
-        return out
-
-
 @inplace_apis_in_dygraph_only
 def log_(x: Tensor, name: str | None = None) -> Tensor:
     r"""
@@ -528,7 +475,13 @@ def scale_(
 
 
 @ParamAliasDecorator({"x": ["input"], "y": ["exponent"]})
-def pow(x: Tensor, y: float | Tensor, name: str | None = None) -> Tensor:
+def pow(
+    x: Tensor,
+    y: float | Tensor,
+    name: str | None = None,
+    *,
+    out: Tensor | None = None,
+) -> Tensor:
     """
     Compute the power of Tensor elements. The equation is:
 
@@ -550,6 +503,7 @@ def pow(x: Tensor, y: float | Tensor, name: str | None = None) -> Tensor:
         y (float|int|Tensor): If it is an N-D Tensor, its data type should be the same as `x`.
         exponent: An alias for ``y`` , with identical behavior.
         name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
+        out (Tensor, optional): The output tensor. If set, the result will be stored in this tensor. Default is None.
 
     Returns:
         N-D Tensor. A location into which the result is stored. Its dimension and data type are the same as `x`.
@@ -584,9 +538,9 @@ def pow(x: Tensor, y: float | Tensor, name: str | None = None) -> Tensor:
     # in dynamic graph mode
     if in_dynamic_or_pir_mode():
         if isinstance(y, (int, float)):
-            return _C_ops.pow(x, y)
+            return _C_ops.pow(x, y, out=out)
         elif isinstance(y, (paddle.Tensor, Variable, paddle.pir.Value)):
-            return _C_ops.elementwise_pow(x, y)
+            return _C_ops.elementwise_pow(x, y, out=out)
         else:
             raise TypeError(
                 f"y must be scalar, Tensor(in dygraph mode), Value(in pir mode) but received: {type(y)}"
@@ -1245,7 +1199,9 @@ def remainder_(x: Tensor, y: Tensor, name: str | None = None) -> Tensor:
     """
 
 
-def multiply(x: Tensor, y: Tensor, name: str | None = None) -> Tensor:
+def multiply(
+    x: Tensor, y: Tensor, name: str | None = None, *, out: Tensor | None = None
+) -> Tensor:
     """
     multiply two tensors element-wise. The equation is:
 
@@ -1264,6 +1220,7 @@ def multiply(x: Tensor, y: Tensor, name: str | None = None) -> Tensor:
         x (Tensor): the input tensor, its data type should be one of bfloat16, float16, float32, float64, int32, int64, bool, complex64, complex128.
         y (Tensor): the input tensor, its data type should be one of bfloat16, float16, float32, float64, int32, int64, bool, complex64, complex128.
         name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
+        out (Tensor|None, optional): The output tensor. If set, the result will be stored in this tensor. Default is None.
 
     Returns:
         N-D Tensor. A location into which the result is stored. If :attr:`x`, :attr:`y` have different shapes and are "broadcastable", the resulting tensor shape is the shape of :attr:`x` and :attr:`y` after broadcasting. If :attr:`x`, :attr:`y` have the same shape, its shape is the same as :attr:`x` and :attr:`y`.
@@ -1291,7 +1248,7 @@ def multiply(x: Tensor, y: Tensor, name: str | None = None) -> Tensor:
 
     """
     if in_dynamic_or_pir_mode():
-        return _C_ops.multiply(x, y)
+        return _C_ops.multiply(x, y, out=out)
     else:
         return _elementwise_op(LayerHelper('elementwise_mul', **locals()))
 
@@ -4793,57 +4750,6 @@ def prod(
         return out
 
 
-def sign(x: Tensor, name: str | None = None) -> Tensor:
-    """
-    Returns sign of every element in `x`: For real numbers, 1 for positive, -1 for negative and 0 for zero. For complex numbers, the return value is a complex number with unit magnitude. If a complex number element is zero, the result is 0+0j.
-
-    Args:
-        x (Tensor): The input tensor. The data type can be uint8, int8, int16, int32, int64, bfloat16, float16, float32, float64, complex64 or complex128.
-        name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
-
-    Returns:
-        Tensor: The output sign tensor with identical shape and data type to the input :attr:`x`.
-
-    Examples:
-        .. code-block:: python
-
-            >>> import paddle
-
-            >>> x = paddle.to_tensor([3.0, 0.0, -2.0, 1.7], dtype='float32')
-            >>> out = paddle.sign(x=x)
-            >>> out
-            Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
-            [ 1.,  0., -1.,  1.])
-    """
-    if in_dynamic_or_pir_mode():
-        return _C_ops.sign(x)
-    else:
-        check_variable_and_dtype(
-            x,
-            'x',
-            [
-                'uint8',
-                'int8',
-                'int16',
-                'int32',
-                'int64',
-                'float16',
-                'bfloat16',
-                'float32',
-                'float64',
-                'complex64',
-                'complex128',
-            ],
-            'sign',
-        )
-        helper = LayerHelper("sign", **locals())
-        out = helper.create_variable_for_type_inference(dtype=x.dtype)
-
-        helper.append_op(type='sign', inputs={'X': [x]}, outputs={'Out': [out]})
-
-        return out
-
-
 def tanh(x: Tensor, name: str | None = None) -> Tensor:
     r"""
     Tanh Activation Operator.
diff --git a/python/paddle/tensor/ops.py b/python/paddle/tensor/ops.py
index 642e2380fa749d..bfac6f015d02da 100644
--- a/python/paddle/tensor/ops.py
+++ b/python/paddle/tensor/ops.py
@@ -15,6 +15,12 @@
 
 from typing import TYPE_CHECKING
 
+from paddle._C_ops import (  # noqa: F401
+    cos,
+    floor,
+    rsqrt,
+    sin,
+)
 from paddle.utils.inplace_utils import inplace_apis_in_dygraph_only
 
 from .. import _C_ops
@@ -474,62 +480,6 @@ def ceil(x: Tensor, name: str | None = None) -> Tensor:
         return out
 
 
-def cos(x: Tensor, name: str | None = None) -> Tensor:
-    """
-    Cosine Operator. Computes cosine of x element-wise.
-
-    Input range is `(-inf, inf)` and output range is `[-1,1]`.
-
-    .. math::
-       out = cos(x)
-
-    Args:
-        x (Tensor): Input of Cos operator, an N-D Tensor, with data type float32, float64, float16, bfloat16,
-            uint8, int8, int16, int32, int64, complex64, complex128.
-        name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
-
-    Returns:
-        Tensor. Output of Cos operator, a Tensor with shape same as input
-            (integer types are autocasted into float32).
-
-    Examples:
-        .. code-block:: python
-
-            >>> import paddle
-
-            >>> x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
-            >>> out = paddle.cos(x)
-            >>> print(out)
-            Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
-            [0.92106098, 0.98006660, 0.99500418, 0.95533651])
-    """
-    if in_dynamic_or_pir_mode():
-        return _C_ops.cos(x)
-    else:
-        check_variable_and_dtype(
-            x,
-            'x',
-            [
-                'float16',
-                'uint16',
-                'float32',
-                'float64',
-                'uint8',
-                'int8',
-                'int16',
-                'int32',
-                'int64',
-                'complex64',
-                'complex128',
-            ],
-            'cos',
-        )
-        helper = LayerHelper('cos', **locals())
-        out = helper.create_variable_for_type_inference(dtype=x.dtype)
-        helper.append_op(type='cos', inputs={"X": x}, outputs={"Out": out})
-        return out
-
-
 def cosh(x: Tensor, name: str | None = None) -> Tensor:
     """
     Cosh Activation Operator.
@@ -686,58 +636,6 @@ def expm1(x: Tensor, name: str | None = None) -> Tensor:
         return out
 
 
-def floor(x: Tensor, name: str | None = None) -> Tensor:
-    """
-
-    Floor Activation Operator. Computes floor of x element-wise.
-
-    .. math::
-        out = \\lfloor x \\rfloor
-
-    Args:
-        x (Tensor): Input of Floor operator, an N-D Tensor, with data type float32, float64, float16, bfloat16,
-            uint8, int8, int16, int32, int64.
-        name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
-
-    Returns:
-        Tensor. Output of Floor operator, a Tensor with shape same as input
-
-    Examples:
-        .. code-block:: python
-
-            >>> import paddle
-
-            >>> x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
-            >>> out = paddle.floor(x)
-            >>> print(out)
-            Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
-            [-1., -1.,  0.,  0.])
-    """
-    if in_dynamic_or_pir_mode():
-        return _C_ops.floor(x)
-    else:
-        check_variable_and_dtype(
-            x,
-            'x',
-            [
-                'float16',
-                'uint16',
-                'float32',
-                'float64',
-                'uint8',
-                'int8',
-                'int16',
-                'int32',
-                'int64',
-            ],
-            'floor',
-        )
-        helper = LayerHelper('floor', **locals())
-        out = helper.create_variable_for_type_inference(dtype=x.dtype)
-        helper.append_op(type='floor', inputs={"X": x}, outputs={"Out": out})
-        return out
-
-
 def reciprocal(x: Tensor, name: str | None = None) -> Tensor:
     """
 
@@ -865,60 +763,6 @@ def round_(x, decimals=0, name=None):
     return _C_ops.round_(x, decimals)
 
 
-def rsqrt(x: Tensor, name: str | None = None) -> Tensor:
-    """
-    Rsqrt Activation Operator.
-
-    Please make sure input is legal in case of numeric errors.
-
-    .. math::
-       out = \\frac{1}{\\sqrt{x}}
-
-    Args:
-        x (Tensor): Input of Rsqrt operator, an N-D Tensor, with data type float32, float64, float16, bfloat16,
-            uint8, int8, int16, int32, int64.
-        name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
-
-    Returns:
-        Tensor. Output of Rsqrt operator, a Tensor with shape same as input
-            (integer types are autocasted into float32).
-
-    Examples:
-        .. code-block:: python
-
-            >>> import paddle
-
-            >>> x = paddle.to_tensor([0.1, 0.2, 0.3, 0.4])
-            >>> out = paddle.rsqrt(x)
-            >>> print(out)
-            Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
-            [3.16227770, 2.23606801, 1.82574177, 1.58113885])
-    """
-    if in_dynamic_or_pir_mode():
-        return _C_ops.rsqrt(x)
-    else:
-        check_variable_and_dtype(
-            x,
-            'x',
-            [
-                'float16',
-                'uint16',
-                'float32',
-                'float64',
-                'uint8',
-                'int8',
-                'int16',
-                'int32',
-                'int64',
-            ],
-            'rsqrt',
-        )
-        helper = LayerHelper('rsqrt', **locals())
-        out = helper.create_variable_for_type_inference(dtype=x.dtype)
-        helper.append_op(type='rsqrt', inputs={"X": x}, outputs={"Out": out})
-        return out
-
-
 def sigmoid(x: Tensor, name: str | None = None) -> Tensor:
     """
     Sigmoid Activation.
@@ -974,60 +818,6 @@ def sigmoid(x: Tensor, name: str | None = None) -> Tensor:
         return out
 
 
-def sin(x: Tensor, name: str | None = None) -> Tensor:
-    """
-    Sine Activation Operator.
-
-    .. math::
-       out = sin(x)
-
-    Args:
-        x (Tensor): Input of Sin operator, an N-D Tensor, with data type float32, float64, float16, bfloat16,
-            uint8, int8, int16, int32, int64, complex64 or complex128.
-        name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
-
-    Returns:
-        Tensor. Output of Sin operator, a Tensor with shape same as input
-            (integer types are autocasted into float32).
-
-    Examples:
-        .. code-block:: python
-
-            >>> import paddle
-
-            >>> x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
-            >>> out = paddle.sin(x)
-            >>> print(out)
-            Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
-            [-0.38941833, -0.19866933,  0.09983342,  0.29552022])
-    """
-    if in_dynamic_or_pir_mode():
-        return _C_ops.sin(x)
-    else:
-        check_variable_and_dtype(
-            x,
-            'x',
-            [
-                'float16',
-                'uint16',
-                'float32',
-                'float64',
-                'uint8',
-                'int8',
-                'int16',
-                'int32',
-                'int64',
-                'complex64',
-                'complex128',
-            ],
-            'sin',
-        )
-        helper = LayerHelper('sin', **locals())
-        out = helper.create_variable_for_type_inference(dtype=x.dtype)
-        helper.append_op(type='sin', inputs={"X": x}, outputs={"Out": out})
-        return out
-
-
 def sinh(x: Tensor, name: str | None = None) -> Tensor:
     """
     Sinh Activation Operator.
diff --git a/test/deprecated/legacy_test/test_learning_rate_scheduler_deprecated.py b/test/deprecated/legacy_test/test_learning_rate_scheduler_deprecated.py
index 6e6f1fe01a34f8..27b06f946882cc 100644
--- a/test/deprecated/legacy_test/test_learning_rate_scheduler_deprecated.py
+++ b/test/deprecated/legacy_test/test_learning_rate_scheduler_deprecated.py
@@ -12,16 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import copy
 import math
-import os
 import unittest
 
 import numpy as np
 
 import paddle
 from paddle import base
-from paddle.base import core, framework
 
 
 def exponential_decay(
@@ -384,134 +381,6 @@ def test_LambdaDecay(self):
                 lr = paddle.optimizer.lr.LambdaDecay(learning_rate, "test")
 
 
-class TestLearningRateDecay(unittest.TestCase):
-    def check_decay(self, python_decay_fn, base_decay_fn, kwargs):
-        places = []
-        if (
-            os.environ.get('FLAGS_CI_both_cpu_and_gpu', 'False').lower()
-            in ['1', 'true', 'on']
-            or not core.is_compiled_with_cuda()
-        ):
-            places.append(base.CPUPlace())
-        if core.is_compiled_with_cuda():
-            places.append(base.CUDAPlace(0))
-        for place in places:
-            self.check_decay_with_place(
-                place, python_decay_fn, base_decay_fn, kwargs
-            )
-
-    def check_decay_with_place(
-        self, place, python_decay_fn, base_decay_fn, kwargs
-    ):
-        main_prog = base.Program()
-        startup_prog = base.Program()
-
-        with base.program_guard(main_prog, startup_prog):
-            decayed_lr = base_decay_fn(**kwargs)
-
-        place = base.CPUPlace()
-        exe = base.Executor(place)
-
-        exe.run(startup_prog)
-
-        for step in range(10):
-            # Step of NoamDecay starts from 1.
-            if python_decay_fn.__name__ == 'noam_decay':
-                step += 1
-            (lr_val,) = exe.run(main_prog, feed={}, fetch_list=[decayed_lr])
-            python_decayed_lr = python_decay_fn(
-                global_step=float(step), **kwargs
-            )
-            self.assertAlmostEqual(
-                python_decayed_lr,
-                lr_val[0],
-                places=6,
-                msg=f'Failed lr scheduler is {python_decay_fn.__name__}, step {step}, Python result is {python_decayed_lr}, Fluid result is {lr_val[0]}',
-            )
-
-    def test_decay(self):
-        common_kwargs_true = {
-            "learning_rate": 1.0,
-            "decay_steps": 5,
-            "decay_rate": 0.5,
-            "staircase": True,
-        }
-        common_kwargs_false = copy.deepcopy(common_kwargs_true)
-        common_kwargs_false["staircase"] = False
-
-        decay_fns = [
-            (
-                exponential_decay,
-                paddle.optimizer.lr.exponential_decay,
-                common_kwargs_true,
-            ),
-            (
-                exponential_decay,
-                paddle.optimizer.lr.exponential_decay,
-                common_kwargs_false,
-            ),
-            (
-                natural_exp_decay,
-                paddle.optimizer.lr.natural_exp_decay,
-                common_kwargs_true,
-            ),
-            (
-                natural_exp_decay,
-                paddle.optimizer.lr.natural_exp_decay,
-                common_kwargs_false,
-            ),
-            (
-                inverse_time_decay,
-                paddle.optimizer.lr.inverse_time_decay,
-                common_kwargs_true,
-            ),
-            (
-                inverse_time_decay,
-                paddle.optimizer.lr.inverse_time_decay,
-                common_kwargs_false,
-            ),
-            (
-                polynomial_decay,
-                paddle.optimizer.lr.polynomial_decay,
-                {"learning_rate": 1.0, "decay_steps": 5, "cycle": True},
-            ),
-            (
-                polynomial_decay,
-                paddle.optimizer.lr.polynomial_decay,
-                {"learning_rate": 1.0, "decay_steps": 5, "cycle": False},
-            ),
-            (
-                piecewise_decay,
-                paddle.optimizer.lr.piecewise_decay,
-                {"boundaries": [3, 6, 9], "values": [0.1, 0.2, 0.3, 0.4]},
-            ),
-            (
-                cosine_decay,
-                paddle.optimizer.lr.cosine_decay,
-                {"learning_rate": 0.1, "step_each_epoch": 100, "epochs": 120},
-            ),
-            (
-                noam_decay,
-                paddle.optimizer.lr.noam_decay,
-                {"d_model": 0.01, "warmup_steps": 200, "learning_rate": 2.0},
-            ),
-        ]
-
-        for py_decay_fn, base_decay_fn, kwargs in decay_fns:
-            print(
-                "class="
-                + self.__class__.__name__
-                + " decay_fn="
-                + py_decay_fn.__name__
-                + " kwargs="
-                + str(kwargs)
-            )
-            main_program = framework.Program()
-            startup_program = framework.Program()
-            with framework.program_guard(main_program, startup_program):
-                self.check_decay(py_decay_fn, base_decay_fn, kwargs)
-
-
 class TestLinearWamrupLearningRateDecay(unittest.TestCase):
     def check_decay_with_place(
         self, place, python_decay_fn, base_decay_fn, kwargs
@@ -552,69 +421,6 @@ def check_decay_with_place(
             )
 
 
-class TestLinearWamrupLearningRateDecayWithScalarInput(unittest.TestCase):
-    def run_scalar_lr(self, place, lr, start_lr, end_lr):
-        main_prog = base.Program()
-        startup_prog = base.Program()
-
-        warmup_steps = 10
-
-        with base.program_guard(main_prog, startup_prog):
-            decayed_lr = paddle.optimizer.lr.linear_lr_warmup(
-                lr, warmup_steps, start_lr, end_lr
-            )
-
-        exe = base.Executor(place)
-        exe.run(startup_prog)
-
-        for step in range(20):
-            (lr_val,) = exe.run(main_prog, feed={}, fetch_list=[decayed_lr])
-            if step < warmup_steps:
-                expected_lr = linear_lr_warmup(
-                    float(step), warmup_steps, start_lr, end_lr
-                )
-            else:
-                expected_lr = lr
-            self.assertAlmostEqual(
-                expected_lr,
-                lr_val[0],
-                places=6,
-                msg=f'Test failed, step {step}, expected {expected_lr}, but got {lr_val[0]}',
-            )
-
-    def test_scalar_lr(self):
-        def run_places(lr, start_lr, end_lr):
-            places = []
-            if (
-                os.environ.get('FLAGS_CI_both_cpu_and_gpu', 'False').lower()
-                in ['1', 'true', 'on']
-                or not core.is_compiled_with_cuda()
-            ):
-                places.append(base.CPUPlace())
-            if core.is_compiled_with_cuda():
-                places.append(base.CUDAPlace(0))
-            for p in places:
-                self.run_scalar_lr(p, lr, start_lr, end_lr)
-
-        # float
-        lr = 0.2
-        start_lr = 0.1 / 3.0
-        end_lr = 0.2
-        run_places(lr, start_lr, end_lr)
-
-        # int end_lr
-        lr = 2.0
-        start_lr = 0.1 / 3.0
-        end_lr = 1
-        run_places(lr, start_lr, end_lr)
-
-        # int
-        lr = 1
-        start_lr = 0
-        end_lr = 1
-        run_places(lr, start_lr, end_lr)
-
-
 if __name__ == '__main__':
     paddle.enable_static()
     unittest.main()
diff --git a/test/deprecated/prim/composite_ops/test_composite_layer_norm_deprecated.py b/test/deprecated/prim/composite_ops/test_composite_layer_norm_deprecated.py
deleted file mode 100644
index d139e637fcb067..00000000000000
--- a/test/deprecated/prim/composite_ops/test_composite_layer_norm_deprecated.py
+++ /dev/null
@@ -1,338 +0,0 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import numpy as np
-from prim.composite_ops.utils import SUB_TOLERANCE
-
-import paddle
-from paddle import _C_ops
-from paddle.base import core, framework
-from paddle.base.layer_helper import LayerHelper
-from paddle.framework import in_dynamic_mode
-from paddle.incubate.autograd import primapi
-from paddle.nn import LayerNorm
-
-
-def generate_data(shape1, shape2, shape3, dtype="float32"):
-    np.random.seed(200)
-    np_data1 = np.random.random(shape1).astype(dtype)
-    np_data2 = np.random.random(shape2).astype(dtype)
-    np_data3 = np.random.random(shape3).astype(dtype)
-    return np_data1, np_data2, np_data3
-
-
-def layer_norm_wrapper(
-    x, normalized_shape, weight=None, bias=None, epsilon=1e-05, name=None
-):
-    input_shape = list(x.shape)
-    input_ndim = len(input_shape)
-
-    normalized_ndim = len(normalized_shape)
-    begin_norm_axis = input_ndim - normalized_ndim
-    if (
-        input_ndim < normalized_ndim
-        or input_shape[begin_norm_axis:] != normalized_shape
-    ):
-        str_normalized_shape = str(normalized_shape)
-        raise ValueError(
-            'Given normalized_shape is '
-            + str_normalized_shape
-            + ', expected input with shape [*, '
-            + str_normalized_shape[1:]
-            + ', but got input shape '
-            + str(input_shape)
-        )
-
-    if in_dynamic_mode():
-        return _C_ops.layer_norm(x, weight, bias, epsilon, begin_norm_axis)
-
-    else:
-        inputs = {}
-        inputs['X'] = [x]
-        if weight:
-            inputs['Scale'] = [weight]
-        if bias:
-            inputs['Bias'] = [bias]
-        attrs = {"epsilon": epsilon, "begin_norm_axis": begin_norm_axis}
-
-        # create output
-        helper = LayerHelper('layer_norm', **locals())
-        from paddle.base.data_feeder import convert_dtype
-
-        param_dtype = (
-            x.dtype if convert_dtype(x.dtype) != 'float16' else 'float32'
-        )
-        mean_out = helper.create_variable_for_type_inference(
-            dtype=param_dtype, stop_gradient=True
-        )
-        variance_out = helper.create_variable_for_type_inference(
-            dtype=param_dtype, stop_gradient=True
-        )
-        layer_norm_out = helper.create_variable_for_type_inference(x.dtype)
-
-        helper.append_op(
-            type="layer_norm",
-            inputs=inputs,
-            outputs={
-                "Y": layer_norm_out,
-                "Mean": mean_out,
-                "Variance": variance_out,
-            },
-            attrs={"epsilon": epsilon, "begin_norm_axis": begin_norm_axis},
-        )
-
-        return layer_norm_out, mean_out, variance_out
-
-
-class Attr:
-    def __init__(self) -> None:
-        self.dtype = None
-        self.n_shape = None
-        self.shape1 = None
-        self.shape2 = None
-        self.shape3 = None
-
-    def set_dtype(self, dtype) -> None:
-        self.dtype = dtype
-
-    def set_shape(self, n_shape, shape1=[], shape2=[], shape3=[]) -> None:
-        self.n_shape = n_shape
-        self.shape1 = shape1
-        self.shape2 = shape2
-        self.shape3 = shape3
-
-    def get_rtol(self, flag):
-        rtol = SUB_TOLERANCE[self.dtype][flag].get("rtol")
-        return rtol
-
-    def get_atol(self, flag):
-        atol = SUB_TOLERANCE[self.dtype][flag].get("atol")
-        return atol
-
-
-attrs = Attr()
-
-
-def fn(x, norm_shape, w, b):
-    return layer_norm_wrapper(x, norm_shape, w, b)
-
-
-def expect_forward(x, norm_shape, w, b):
-    return fn(x, norm_shape, w, b)
-
-
-class TestCompositelayer_norm(unittest.TestCase):
-    def setUp(self):
-        self.dtypes = ["float32", "float64"]
-        self.n_shape = [[4], [64, 128], [64]]
-        self.shape1s = [[3, 4], [64, 64, 128], [128, 64, 64]]
-        self.shape2s = [[4], [64 * 128], [64]]
-        self.shape3s = [[4], [64 * 128], [64]]
-
-    def cal_composite(self, inputs, norm_shape, weight, bias):
-        paddle.enable_static()
-        core._set_prim_forward_enabled(True)
-        startup_program = paddle.static.Program()
-        main_program = paddle.static.Program()
-        with paddle.static.program_guard(main_program, startup_program):
-            x = paddle.static.data(
-                'x', shape=inputs.shape, dtype=str(inputs.dtype)
-            )
-            w = paddle.static.data(
-                'w', shape=weight.shape, dtype=str(weight.dtype)
-            )
-            b = paddle.static.data('b', shape=bias.shape, dtype=str(bias.dtype))
-            out, mean, var = fn(x, norm_shape, w, b)
-
-            blocks = main_program.blocks
-
-            fwd_ops = [op.type for op in blocks[0].ops]
-            # Ensure that layer_norm in original block
-            self.assertTrue('layer_norm' in fwd_ops)
-
-            primapi.to_prim(blocks)
-
-            fwd_ops_new = [op.type for op in blocks[0].ops]
-            # Ensure that layer_norm is split into small ops
-            self.assertTrue('layer_norm' not in fwd_ops_new)
-
-        exe = paddle.static.Executor()
-        exe.run(startup_program)
-        res = exe.run(
-            main_program,
-            feed={
-                'x': inputs,
-                'w': weight,
-                'b': bias,
-            },
-            fetch_list=[out, mean, var],
-        )
-        paddle.disable_static()
-        core._set_prim_forward_enabled(False)
-        return res
-
-    def cal2_composite(self, inputs, norm_shape, weight, bias):
-        paddle.enable_static()
-        core._set_prim_forward_enabled(True)
-        startup_program = paddle.static.Program()
-        main_program = paddle.static.Program()
-        with paddle.static.program_guard(main_program, startup_program):
-            x = paddle.static.data(
-                'x', shape=inputs.shape, dtype=str(inputs.dtype)
-            )
-
-            out, mean, var = fn(x, norm_shape, weight, bias)
-
-            blocks = main_program.blocks
-
-            fwd_ops = [op.type for op in blocks[0].ops]
-            # Ensure that layer_norm in original block
-            self.assertTrue('layer_norm' in fwd_ops)
-
-            primapi.to_prim(blocks)
-
-            fwd_ops_new = [op.type for op in blocks[0].ops]
-            # Ensure that layer_norm is split into small ops
-            self.assertTrue('layer_norm' not in fwd_ops_new)
-
-        exe = paddle.static.Executor()
-        exe.run(startup_program)
-        res = exe.run(
-            main_program,
-            feed={
-                'x': inputs,
-            },
-            fetch_list=[out, mean, var],
-        )
-        paddle.disable_static()
-        core._set_prim_forward_enabled(False)
-        return res
-
-    def compare_forward(self):
-        x, w, b = generate_data(
-            attrs.shape1, attrs.shape2, attrs.shape3, attrs.dtype
-        )
-        n_shape = attrs.n_shape
-        x_p = paddle.to_tensor(x)
-        w_p = paddle.to_tensor(w)
-        b_p = paddle.to_tensor(b)
-
-        expect = expect_forward(x_p, n_shape, w_p, b_p)
-        actual, _a_mean, _a_var = self.cal_composite(x, n_shape, w, b)
-
-        assert expect.numpy().dtype == actual.dtype
-        np.testing.assert_allclose(
-            expect.numpy(),
-            actual,
-            rtol=attrs.get_rtol("forward"),
-            atol=attrs.get_atol("forward"),
-        )
-
-        expect_2 = expect_forward(x_p, n_shape, None, None)
-        actual_2, _a_mean_2, _a_var_2 = self.cal2_composite(
-            x, n_shape, None, None
-        )
-        assert expect_2.numpy().dtype == actual_2.dtype
-        np.testing.assert_allclose(
-            expect_2.numpy(),
-            actual_2,
-            rtol=attrs.get_rtol("forward"),
-            atol=attrs.get_atol("forward"),
-        )
-
-    def test_forward(self):
-        for j in self.dtypes:
-            if paddle.device.get_device() == "cpu" and j == "float16":
-                print("need pass this case")
-                continue
-            for t in range(0, len(self.shape1s)):
-                attrs.set_dtype(j)
-                attrs.set_shape(
-                    self.n_shape[t],
-                    self.shape1s[t],
-                    self.shape2s[t],
-                    self.shape3s[t],
-                )
-                self.compare_forward()
-
-
-def apply_to_static(net, use_cinn):
-    return paddle.jit.to_static(net, backend=None, full_graph=True)
-
-
-class PrimeNet(paddle.nn.Layer):
-    def __init__(self, n_shape):
-        super().__init__()
-        self.ln = LayerNorm(n_shape)
-
-    def forward(self, x):
-        out = self.ln(x)
-        return out
-
-
-class TestPrimForwardAndBackward(unittest.TestCase):
-    """
-    Test PrimeNet with @to_static + prim forward + prim backward + cinn v.s Dygraph
-    """
-
-    def setUp(self):
-        paddle.seed(2022)
-        self.n_shape = [[4], [64, 128], [64]]
-        self.shape1s = [[3, 4], [64, 64, 128], [128, 64, 64]]
-
-    def train(self, use_prim):
-        self.x = paddle.randn(attrs.shape1, dtype="float32")
-        self.x.stop_gradient = False
-        core._set_prim_all_enabled(use_prim)
-        paddle.seed(2022)
-        net = PrimeNet(attrs.n_shape)
-        sgd = paddle.optimizer.SGD(
-            learning_rate=0.1, parameters=net.parameters()
-        )
-
-        net = paddle.amp.decorate(models=net, level='O2')
-
-        net = apply_to_static(net, False)
-        with paddle.amp.auto_cast(level='O2'):
-            out = net(self.x)
-            loss = paddle.mean(out)
-            loss.backward()
-            sgd.step()
-            sgd.clear_grad()
-            return loss
-
-    def compare_forward(self):
-        if not isinstance(framework._current_expected_place(), core.CPUPlace):
-            expected = self.train(False)
-            actual = self.train(True)
-            np.testing.assert_allclose(
-                expected,
-                actual,
-                rtol=1e-3,
-                atol=1e-3,
-            )
-
-    def test_forward(self):
-        for t in range(0, len(self.shape1s)):
-            attrs.set_shape(
-                self.n_shape[t],
-                self.shape1s[t],
-            )
-            self.compare_forward()
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/test/deprecated/prim/composite_ops/test_composite_layer_norm_grad_deprecated.py b/test/deprecated/prim/composite_ops/test_composite_layer_norm_grad_deprecated.py
deleted file mode 100644
index 8d894934a28af1..00000000000000
--- a/test/deprecated/prim/composite_ops/test_composite_layer_norm_grad_deprecated.py
+++ /dev/null
@@ -1,791 +0,0 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-from functools import reduce
-from operator import mul
-
-import numpy as np
-from prim.composite_ops.utils import SUB_TOLERANCE
-
-import paddle
-import paddle.nn.functional as F
-from paddle.base import core
-from paddle.incubate.autograd import primapi
-
-TOLERANCE_NUMPY = {
-    "float32": {"rtol": 2e-5, "atol": 2e-5},
-    "float64": {"rtol": 1e-11, "atol": 1e-11},
-}
-
-TOLERANCE_COMP_GRAD = {
-    "float64": {"rtol": 1e-13, "atol": 1e-13},
-    "float32": {"rtol": 1e-5, "atol": 1e-5},
-    "float16": {"rtol": 1e-3, "atol": 1e-3},  # amp
-}
-
-
-def generate_data(shape1, shape2, shape3, dtype="float32"):
-    np.random.seed(12)
-    np_data1 = np.random.random(shape1).astype(dtype)
-    np_data2 = np.random.random(shape2).astype(dtype)
-    np_data3 = np.random.random(shape3).astype(dtype)
-    np_data4 = np.ones_like(np_data1).astype(dtype)
-    return np_data1, np_data2, np_data3, np_data4
-
-
-def _reference_layer_norm_naive(
-    x, scale, beta, epsilon=1e-5, begin_norm_axis=1
-):
-    x_shape = x.shape
-    N = reduce(mul, x_shape[0:begin_norm_axis], 1)
-    D = reduce(mul, x_shape[begin_norm_axis : len(x_shape)], 1)
-    x.shape = [N, D]
-
-    mean = np.mean(x, axis=1)
-    difference = x - mean.reshape([N, 1])
-    var_tmp1 = np.power(difference, 2.0)
-    variance = np.mean(var_tmp1, axis=1)
-    var = variance + epsilon
-    # var = np.var(x, axis=1) + epsilon
-    output = np.divide(
-        (x - mean.reshape([N, 1])), (np.sqrt(var)).reshape([N, 1])
-    )
-    if scale is not None:
-        output = scale.reshape([1, D]) * output
-    if beta is not None:
-        output = output + beta.reshape([1, D])
-
-    x.shape, output.shape = x_shape, x_shape
-    return output, mean, var
-
-
-def _reference_layer_norm_grad(
-    x, grad_y, scale, bias, mean, var, begin_norm_axis=1
-):
-    x_shape = x.shape
-    N = reduce(mul, x_shape[0:begin_norm_axis], 1)
-    D = reduce(mul, x_shape[begin_norm_axis : len(x_shape)], 1)
-
-    if scale is not None:
-        scale_shape = scale.shape
-        scale.shape = [1, D]
-    x.shape, grad_y.shape = [N, D], [N, D]
-    var.shape, mean.shape = [N, 1], [N, 1]
-
-    # d_bias
-    if bias is not None:
-        d_bias = np.sum(grad_y, axis=0).reshape([1, D])
-    else:
-        d_bias = None
-    # d_scale
-    if scale is not None:
-        d_scale = np.sum(
-            ((x - mean) * np.sqrt(1 / var)) * grad_y, axis=0
-        ).reshape([1, D])
-    else:
-        d_scale = None
-    # dx
-    if scale is not None:
-        dx_end = scale * np.sqrt(1.0 / var) * grad_y
-        d_mean_0 = np.sum(-np.sqrt(1.0 / var) * grad_y * scale, axis=1).reshape(
-            [N, 1]
-        )  # the second part equals to zero.
-        d_mean = 1.0 / D * d_mean_0
-        d_std = np.sum(
-            -(1.0 / var) * (x - mean) * grad_y * scale, axis=1
-        ).reshape([N, 1]) * (
-            1.0 / D * np.sqrt(1.0 / var).reshape([N, 1]) * (x - mean)
-        )
-    else:
-        dx_end = 1.0 * np.sqrt(1.0 / var) * grad_y
-        d_mean_0 = np.sum(-np.sqrt(1.0 / var) * grad_y * 1.0, axis=1).reshape(
-            [N, 1]
-        )  # the second part equals to zero.
-        d_mean = 1.0 / D * d_mean_0
-        d_std = np.sum(
-            -(1.0 / var) * (x - mean) * grad_y * 1.0, axis=1
-        ).reshape([N, 1]) * (
-            1.0 / D * np.sqrt(1.0 / var).reshape([N, 1]) * (x - mean)
-        )
-
-    grad_x = dx_end + d_mean + d_std
-
-    grad_x.shape, x.shape, grad_y.shape = x_shape, x_shape, x_shape
-    var.shape, mean.shape = [N], [N]
-
-    if scale is not None:
-        scale.shape = scale_shape
-
-    return grad_x, d_scale, d_bias
-
-
-class Attr:
-    def __init__(self) -> None:
-        self.dtype = None
-        self.n_shape = None
-        self.shape1 = None
-        self.shape2 = None
-        self.shape3 = None
-
-    def set_dtype(self, dtype) -> None:
-        self.dtype = dtype
-
-    def set_shape(self, n_shape, shape1, shape2, shape3) -> None:
-        self.n_shape = n_shape
-        self.shape1 = shape1
-        self.shape2 = shape2
-        self.shape3 = shape3
-
-    def get_rtol(self, flag):
-        rtol = SUB_TOLERANCE[self.dtype][flag].get("rtol")
-        return rtol
-
-    def get_atol(self, flag):
-        atol = SUB_TOLERANCE[self.dtype][flag].get("atol")
-        return atol
-
-
-attrs = Attr()
-
-
-def fn(x, norm_shape, w, b):
-    return F.layer_norm(x, norm_shape, w, b)
-
-
-def dygraph_fused_backward_withNone(x, norm_shape, w, b, y_g):
-    paddle.disable_static()
-    x.stop_gradient = False
-    res = fn(x, norm_shape, w, b)
-    gradients = paddle.grad(res, x, y_g)
-    return gradients
-
-
-def dygraph_fused_backward(x, norm_shape, w, b, y_g):
-    paddle.disable_static()
-    x.stop_gradient = False
-    w.stop_gradient = False
-    b.stop_gradient = False
-    res = fn(x, norm_shape, w, b)
-    gradients = paddle.grad(res, [x, w, b], y_g)
-    return gradients[0], gradients[1], gradients[2]
-
-
-class TestCompositelayer_norm(unittest.TestCase):
-    def setUp(self):
-        self.dtypes = ["float32"]
-        self.n_shape = [[4], [64, 128], [64]]
-        self.shape1s = [[3, 4], [64, 64, 128], [128, 64, 64]]
-        self.shape2s = [[4], [64 * 128], [64]]
-        self.shape3s = [[4], [64 * 128], [64]]
-
-    def static_comp_forward(self, inputs, norm_shape, weight, bias, y_g):
-        paddle.enable_static()
-        core._set_prim_forward_enabled(True)
-        startup_program = paddle.static.Program()
-        main_program = paddle.static.Program()
-        with paddle.static.program_guard(main_program, startup_program):
-            x = paddle.static.data(
-                'x', shape=inputs.shape, dtype=str(inputs.dtype)
-            )
-            x.stop_gradient = False
-            w = paddle.static.data(
-                'w', shape=weight.shape, dtype=str(weight.dtype)
-            )
-            w.stop_gradient = False
-            b = paddle.static.data('b', shape=bias.shape, dtype=str(bias.dtype))
-            b.stop_gradient = False
-
-            y = fn(x, norm_shape, w, b)
-
-            y_grad = paddle.static.data(
-                'y_grad', shape=y_g.shape, dtype=str(y_g.dtype)
-            )
-
-            blocks = main_program.blocks
-
-            fwd_ops = [op.type for op in blocks[0].ops]
-            # Ensure that layer_norm in original block
-            self.assertTrue('layer_norm' in fwd_ops)
-
-            primapi.to_prim(blocks)
-
-            fwd_ops_new = [op.type for op in blocks[0].ops]
-            # Ensure that layer_norm is split into small ops
-            self.assertTrue('layer_norm' not in fwd_ops_new)
-
-            z = paddle.static.gradients([y], [x, w, b], y_grad)
-
-            fwd_ops_grad = [op.type for op in blocks[0].ops]
-            # Ensure that layer_norm_grad not in grad block
-            self.assertTrue('layer_norm_grad' not in fwd_ops_grad)
-
-        exe = paddle.static.Executor()
-        exe.run(startup_program)
-        res = exe.run(
-            main_program,
-            feed={
-                'x': inputs,
-                'w': weight,
-                'b': bias,
-                'y_grad': y_g,
-            },
-            fetch_list=z,
-        )
-        paddle.disable_static()
-        core._set_prim_forward_enabled(False)
-        return res
-
-    def static_comp_forward_withNone(
-        self, inputs, norm_shape, weight, bias, y_g
-    ):
-        paddle.enable_static()
-        core._set_prim_forward_enabled(True)
-        startup_program = paddle.static.Program()
-        main_program = paddle.static.Program()
-        with paddle.static.program_guard(main_program, startup_program):
-            x = paddle.static.data(
-                'x', shape=inputs.shape, dtype=str(inputs.dtype)
-            )
-            y_grad = paddle.static.data(
-                'y_grad', shape=y_g.shape, dtype=str(y_g.dtype)
-            )
-            x.stop_gradient = False
-            y = fn(x, norm_shape, weight, bias)
-
-            blocks = main_program.blocks
-
-            fwd_ops = [op.type for op in blocks[0].ops]
-            # Ensure that layer_norm in original block
-            self.assertTrue('layer_norm' in fwd_ops)
-
-            primapi.to_prim(blocks)
-
-            fwd_ops_new = [op.type for op in blocks[0].ops]
-            # Ensure that layer_norm is split into small ops
-            self.assertTrue('layer_norm' not in fwd_ops_new)
-
-            z = paddle.static.gradients([y], x, y_grad)
-            fwd_ops_grad = [op.type for op in blocks[0].ops]
-            # Ensure that layer_norm_grad not in grad block
-            self.assertTrue('layer_norm_grad' not in fwd_ops_grad)
-
-        exe = paddle.static.Executor()
-        exe.run(startup_program)
-        res = exe.run(
-            main_program,
-            feed={
-                'x': inputs,
-                'y_grad': y_g,
-            },
-            fetch_list=z,
-        )
-        paddle.disable_static()
-        core._set_prim_forward_enabled(False)
-        return res
-
-    # to_pirm after gradient can call comp_layer_norm_grad
-    def static_comp_forward_and_backward(
-        self, inputs, norm_shape, weight, bias, y_g
-    ):
-        paddle.enable_static()
-        core._set_prim_all_enabled(True)
-        startup_program = paddle.static.Program()
-        main_program = paddle.static.Program()
-        with paddle.static.program_guard(main_program, startup_program):
-            x = paddle.static.data(
-                'x', shape=inputs.shape, dtype=str(inputs.dtype)
-            )
-            x.stop_gradient = False
-            w = paddle.static.data(
-                'w', shape=weight.shape, dtype=str(weight.dtype)
-            )
-            w.stop_gradient = False
-            b = paddle.static.data('b', shape=bias.shape, dtype=str(bias.dtype))
-            b.stop_gradient = False
-
-            y_grad = paddle.static.data(
-                'y_grad', shape=y_g.shape, dtype=str(y_g.dtype)
-            )
-
-            y = fn(x, norm_shape, w, b)
-
-            blocks = main_program.blocks
-
-            fwd_ops = [op.type for op in blocks[0].ops]
-            # Ensure that layer_norm in original block
-            self.assertTrue('layer_norm' in fwd_ops)
-
-            z = paddle.static.gradients([y], [x, w, b], y_grad)
-
-            primapi.to_prim(blocks)
-
-        exe = paddle.static.Executor()
-        exe.run(startup_program)
-        res = exe.run(
-            main_program,
-            feed={
-                'x': inputs,
-                'w': weight,
-                'b': bias,
-                'y_grad': y_g,
-            },
-            fetch_list=z,
-        )
-        paddle.disable_static()
-        core._set_prim_all_enabled(False)
-        return res
-
-    def static_comp_forward_and_backward_withNone(
-        self, inputs, norm_shape, weight, bias, y_g
-    ):
-        paddle.enable_static()
-        core._set_prim_all_enabled(True)
-        startup_program = paddle.static.Program()
-        main_program = paddle.static.Program()
-        with paddle.static.program_guard(main_program, startup_program):
-            x = paddle.static.data(
-                'x', shape=inputs.shape, dtype=str(inputs.dtype)
-            )
-            x.stop_gradient = False
-
-            y_grad = paddle.static.data(
-                'y_grad', shape=y_g.shape, dtype=str(y_g.dtype)
-            )
-
-            y = fn(x, norm_shape, weight, bias)
-
-            blocks = main_program.blocks
-
-            fwd_ops = [op.type for op in blocks[0].ops]
-            # Ensure that layer_norm in original block
-            self.assertTrue('layer_norm' in fwd_ops)
-
-            z = paddle.static.gradients([y], [x], y_grad)
-
-            primapi.to_prim(blocks)
-
-        exe = paddle.static.Executor()
-        exe.run(startup_program)
-        res = exe.run(
-            main_program,
-            feed={
-                'x': inputs,
-                'y_grad': y_g,
-            },
-            fetch_list=z,
-        )
-        paddle.disable_static()
-        core._set_prim_all_enabled(False)
-        return res
-
-    def compare_comp_forward(self):
-        x, w, b, y_g = generate_data(
-            attrs.shape1, attrs.shape2, attrs.shape3, attrs.dtype
-        )
-        n_shape = attrs.n_shape
-        x_p = paddle.to_tensor(x)
-        w_p = paddle.to_tensor(w)
-        b_p = paddle.to_tensor(b)
-        y_g_p = paddle.to_tensor(y_g)
-
-        expect = dygraph_fused_backward(x_p, n_shape, w_p, b_p, y_g_p)
-        actual_fwd = self.static_comp_forward(x, n_shape, w, b, y_g)
-        actual_all = self.static_comp_forward_and_backward(
-            x, n_shape, w, b, y_g
-        )
-
-        assert expect[0].numpy().dtype == actual_fwd[0].dtype
-        np.testing.assert_allclose(
-            expect[0].numpy(),
-            actual_fwd[0],
-            rtol=attrs.get_rtol("backward"),
-            atol=attrs.get_atol("backward"),
-        )
-
-        np.testing.assert_allclose(
-            actual_fwd[0],
-            actual_all[0],
-            rtol=TOLERANCE_COMP_GRAD[attrs.dtype]['rtol'],
-            atol=TOLERANCE_COMP_GRAD[attrs.dtype]['atol'],
-        )
-
-    def compare_comp_forward_withNone(self):
-        x, w, b, y_g = generate_data(
-            attrs.shape1, attrs.shape2, attrs.shape3, attrs.dtype
-        )
-        n_shape = attrs.n_shape
-        x_p = paddle.to_tensor(x)
-        w_p = paddle.to_tensor(w)
-        b_p = paddle.to_tensor(b)
-        y_g_p = paddle.to_tensor(y_g)
-
-        expect_2 = dygraph_fused_backward_withNone(
-            x_p, n_shape, None, None, y_g_p
-        )[0].numpy()
-        actual_2 = self.static_comp_forward_withNone(
-            x, n_shape, None, None, y_g
-        )[0]
-        actual_all_2 = self.static_comp_forward_and_backward_withNone(
-            x, n_shape, None, None, y_g
-        )[0]
-
-        assert expect_2.dtype == actual_2.dtype
-        np.testing.assert_allclose(
-            expect_2,
-            actual_2,
-            rtol=attrs.get_rtol("backward"),
-            atol=attrs.get_atol("backward"),
-        )
-
-        np.testing.assert_allclose(
-            expect_2,
-            actual_all_2,
-            rtol=TOLERANCE_COMP_GRAD[attrs.dtype]['rtol'],
-            atol=TOLERANCE_COMP_GRAD[attrs.dtype]['atol'],
-        )
-
-    def test_backward(self):
-        for j in self.dtypes:
-            if paddle.device.get_device() == "cpu":
-                print("need pass this case")
-                continue
-            for t in range(0, len(self.shape1s)):
-                attrs.set_dtype(j)
-                attrs.set_shape(
-                    self.n_shape[t],
-                    self.shape1s[t],
-                    self.shape2s[t],
-                    self.shape3s[t],
-                )
-                self.compare_comp_forward()
-
-    def test_backward_withNone(self):
-        for t in range(0, len(self.shape1s)):
-            if paddle.device.get_device() == "cpu":
-                print("need pass this case")
-                continue
-            attrs.set_dtype("float32")
-            attrs.set_shape(
-                self.n_shape[t],
-                self.shape1s[t],
-                self.shape2s[t],
-                self.shape3s[t],
-            )
-            self.compare_comp_forward_withNone()
-
-
-class TestCompositelayer_normPrimBackward(unittest.TestCase):
-    def setUp(self):
-        core._set_prim_backward_enabled(True)
-        self.dtypes = ["float32"]
-        self.n_shape = [[4], [64, 128], [64]]
-        self.shape1s = [[3, 4], [64, 64, 128], [128, 64, 64]]
-        self.shape2s = [[4], [64 * 128], [64]]
-        self.shape3s = [[4], [64 * 128], [64]]
-
-    def static_comp_forward_and_backward(
-        self, inputs, norm_shape, weight, bias
-    ):
-        paddle.enable_static()
-        core._set_prim_all_enabled(True)
-        startup_program = paddle.static.Program()
-        main_program = paddle.static.Program()
-        with paddle.static.program_guard(main_program, startup_program):
-            x = paddle.static.data(
-                'x', shape=inputs.shape, dtype=str(inputs.dtype)
-            )
-            x.stop_gradient = False
-            w = paddle.static.data(
-                'w', shape=weight.shape, dtype=str(weight.dtype)
-            )
-            b = paddle.static.data('b', shape=bias.shape, dtype=str(bias.dtype))
-            y = fn(x, norm_shape, w, b)
-
-            blocks = main_program.blocks
-            primapi.to_prim(blocks)
-            z = paddle.static.gradients([y], x)
-
-        exe = paddle.static.Executor()
-        exe.run(startup_program)
-        res = exe.run(
-            main_program,
-            feed={
-                'x': inputs,
-                'w': weight,
-                'b': bias,
-            },
-            fetch_list=[z],
-        )
-        paddle.disable_static()
-        core._set_prim_all_enabled(False)
-        return res
-
-    def static_comp_forward_and_backward_withNone(
-        self, inputs, norm_shape, weight, bias
-    ):
-        paddle.enable_static()
-        core._set_prim_all_enabled(True)
-        startup_program = paddle.static.Program()
-        main_program = paddle.static.Program()
-        with paddle.static.program_guard(main_program, startup_program):
-            x = paddle.static.data(
-                'x', shape=inputs.shape, dtype=str(inputs.dtype)
-            )
-            x.stop_gradient = False
-            y = fn(x, norm_shape, weight, bias)
-
-            blocks = main_program.blocks
-            primapi.to_prim(blocks)
-            z = paddle.static.gradients([y], x)
-
-        exe = paddle.static.Executor()
-        exe.run(startup_program)
-        res = exe.run(
-            main_program,
-            feed={
-                'x': inputs,
-            },
-            fetch_list=[z],
-        )
-        paddle.disable_static()
-        core._set_prim_all_enabled(False)
-        return res
-
-    def compare_backward(self):
-        x, w, b, y_g = generate_data(
-            attrs.shape1, attrs.shape2, attrs.shape3, attrs.dtype
-        )
-        n_shape = attrs.n_shape
-        x_p = paddle.to_tensor(x)
-        w_p = paddle.to_tensor(w)
-        b_p = paddle.to_tensor(b)
-        y_g_p = paddle.to_tensor(y_g)
-
-        expect = dygraph_fused_backward(x_p, n_shape, w_p, b_p, y_g_p)[
-            0
-        ].numpy()
-        actual = self.static_comp_forward_and_backward(x, n_shape, w, b)[0]
-
-        assert expect.dtype == actual.dtype
-        np.testing.assert_allclose(
-            expect,
-            actual,
-            rtol=attrs.get_rtol("prim_backward"),
-            atol=attrs.get_rtol("prim_backward"),
-        )
-
-        expect_2 = dygraph_fused_backward_withNone(
-            x_p, n_shape, None, None, y_g_p
-        )[0].numpy()
-        actual_2 = self.static_comp_forward_and_backward_withNone(
-            x, n_shape, None, None
-        )[0]
-        assert expect_2.dtype == actual_2.dtype
-        np.testing.assert_allclose(
-            expect_2,
-            actual_2,
-            rtol=attrs.get_rtol("prim_backward"),
-            atol=attrs.get_atol("prim_backward"),
-        )
-
-    def test_prim_backward(self):
-        for j in self.dtypes:
-            if paddle.device.get_device() == "cpu":
-                print("need pass this case")
-                continue
-            for t in range(0, len(self.shape1s)):
-                attrs.set_dtype(j)
-                attrs.set_shape(
-                    self.n_shape[t],
-                    self.shape1s[t],
-                    self.shape2s[t],
-                    self.shape3s[t],
-                )
-                self.compare_backward()
-
-
-class TestCompositeNumpylayer_norm(unittest.TestCase):
-    def setUp(self):
-        self.dtypes = ["float32", "float64"]
-        self.n_shape = [
-            [4],
-            [64, 128],
-        ]
-        self.shape1s = [
-            [3, 4],
-            [64, 64, 128],
-        ]
-        self.shape2s = [
-            [4],
-            [64 * 128],
-        ]
-        self.shape3s = [
-            [4],
-            [64 * 128],
-        ]
-
-    def static_comp_forward(self, inputs, norm_shape, weight, bias, y_grad):
-        paddle.enable_static()
-        core._set_prim_forward_enabled(True)
-        startup_program = paddle.static.Program()
-        main_program = paddle.static.Program()
-        with paddle.static.program_guard(main_program, startup_program):
-            x = paddle.static.data(
-                'x', shape=inputs.shape, dtype=str(inputs.dtype)
-            )
-            x.stop_gradient = False
-            w = paddle.static.data(
-                'w', shape=weight.shape, dtype=str(weight.dtype)
-            )
-            b = paddle.static.data('b', shape=bias.shape, dtype=str(bias.dtype))
-            y = fn(x, norm_shape, w, b)
-            y_g = paddle.static.data(
-                'y_g', shape=y_grad.shape, dtype=str(y_grad.dtype)
-            )
-            blocks = main_program.blocks
-
-            fwd_ops = [op.type for op in blocks[0].ops]
-            # Ensure that layer_norm in original block
-            self.assertTrue('layer_norm' in fwd_ops)
-
-            primapi.to_prim(blocks)
-
-            fwd_ops_new = [op.type for op in blocks[0].ops]
-            # Ensure that layer_norm is split into small ops
-            self.assertTrue('layer_norm' not in fwd_ops_new)
-
-            z = paddle.static.gradients([y], x, y_g)
-            fwd_ops_grad = [op.type for op in blocks[0].ops]
-            # Ensure that layer_norm_grad not in grad block
-
-            self.assertTrue('layer_norm_grad' not in fwd_ops_grad)
-
-        exe = paddle.static.Executor()
-        exe.run(startup_program)
-        res = exe.run(
-            main_program,
-            feed={
-                'x': inputs,
-                'w': weight,
-                'b': bias,
-                'y_g': y_grad,
-            },
-            fetch_list=[y, z[0]],
-        )
-        paddle.disable_static()
-        core._set_prim_forward_enabled(False)
-        return res[0], res[1]
-
-    def static_comp_forward_prim(
-        self, inputs, norm_shape, weight, bias, y_grad
-    ):
-        paddle.enable_static()
-        core._set_prim_all_enabled(True)
-        startup_program = paddle.static.Program()
-        main_program = paddle.static.Program()
-        with paddle.static.program_guard(main_program, startup_program):
-            x = paddle.static.data(
-                'x', shape=inputs.shape, dtype=str(inputs.dtype)
-            )
-            x.stop_gradient = False
-            w = paddle.static.data(
-                'w', shape=weight.shape, dtype=str(weight.dtype)
-            )
-            b = paddle.static.data('b', shape=bias.shape, dtype=str(bias.dtype))
-            y = fn(x, norm_shape, w, b)
-            y_g = paddle.static.data(
-                'y_g', shape=y_grad.shape, dtype=str(y_grad.dtype)
-            )
-
-            blocks = main_program.blocks
-            primapi.to_prim(blocks)
-            z = paddle.static.gradients([y], x)
-
-        exe = paddle.static.Executor()
-        exe.run(startup_program)
-        res = exe.run(
-            main_program,
-            feed={'x': inputs, 'w': weight, 'b': bias, 'y_g': y_grad},
-            fetch_list=[y, z[0]],
-        )
-        paddle.disable_static()
-        core._set_prim_all_enabled(False)
-        return res[0], res[1]
-
-    def compare_backward(self):
-        x, w, b, y_grad = generate_data(
-            attrs.shape1, attrs.shape2, attrs.shape3, attrs.dtype
-        )
-
-        n_shape = attrs.n_shape
-
-        composite1, composite2 = self.static_comp_forward(
-            x, n_shape, w, b, y_grad
-        )
-        composite_p1, composite_p2 = self.static_comp_forward_prim(
-            x, n_shape, w, b, y_grad
-        )
-
-        numpy1, mean, variance = _reference_layer_norm_naive(
-            x,
-            w,
-            b,
-        )
-        numpy2, _, _ = _reference_layer_norm_grad(
-            x,
-            y_grad,
-            w,
-            b,
-            mean,
-            variance,
-        )
-
-        # forward_prim
-        np.testing.assert_allclose(
-            composite1,
-            numpy1,
-            rtol=TOLERANCE_NUMPY[attrs.dtype]['rtol'],
-            atol=TOLERANCE_NUMPY[attrs.dtype]['atol'],
-        )
-        # forward_prim + backward
-        np.testing.assert_allclose(
-            composite2,
-            numpy2,
-            rtol=TOLERANCE_NUMPY[attrs.dtype]['rtol'],
-            atol=TOLERANCE_NUMPY[attrs.dtype]['atol'],
-        )
-        # forward_prim + backward_prim
-        np.testing.assert_allclose(
-            composite_p2,
-            numpy2,
-            rtol=TOLERANCE_NUMPY[attrs.dtype]['rtol'],
-            atol=TOLERANCE_NUMPY[attrs.dtype]['atol'],
-        )
-
-    def test_backward(self):
-        for j in self.dtypes:
-            for t in range(0, len(self.shape1s)):
-                attrs.set_dtype(j)
-                attrs.set_shape(
-                    self.n_shape[t],
-                    self.shape1s[t],
-                    self.shape2s[t],
-                    self.shape3s[t],
-                )
-                self.compare_backward()
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/test/legacy_test/test_activation_op.py b/test/legacy_test/test_activation_op.py
index d62dcb23fff004..7a13bbd0358ef6 100644
--- a/test/legacy_test/test_activation_op.py
+++ b/test/legacy_test/test_activation_op.py
@@ -146,7 +146,7 @@ def test_check_grad(self):
         self.check_grad(
             ['X'],
             'Out',
-            check_prim=True,
+            check_prim=False,
             check_pir=True,
             check_prim_pir=True,
             check_pir_onednn=self.check_pir_onednn,
@@ -444,7 +444,7 @@ def test_check_grad(self):
             ['X'],
             'Out',
             max_relative_error=0.01,
-            check_prim=True,
+            check_prim=False,
             check_pir=True,
             check_prim_pir=True,
             check_pir_onednn=self.check_pir_onednn,
@@ -531,7 +531,7 @@ def test_check_output(self):
         place = core.CUDAPlace(0)
         self.check_output_with_place(
             place,
-            check_prim=True,
+            check_prim=False,
             check_pir=True,
             check_prim_pir=True,
             check_pir_onednn=self.check_pir_onednn,
@@ -544,7 +544,7 @@ def test_check_grad(self):
             place,
             ['X'],
             'Out',
-            check_prim=True,
+            check_prim=False,
             check_pir=True,
             check_prim_pir=True,
         )
@@ -596,7 +596,7 @@ def test_check_output(self):
             )
         else:
             self.check_output(
-                check_prim=True,
+                check_prim=False,
                 check_pir=True,
                 check_prim_pir=True,
                 check_pir_onednn=self.check_pir_onednn,
@@ -604,7 +604,7 @@ def test_check_output(self):
             )
 
     def test_check_grad(self):
-        # TODO(BeingGod): set `check_prim=True` when `fill_constant` supports `complex` dtype
+        # TODO(BeingGod): set `check_prim=False` when `fill_constant` supports `complex` dtype
         if self.dtype == np.complex64 or self.dtype == np.complex128:
             self.check_grad(
                 ['X'],
@@ -616,7 +616,7 @@ def test_check_grad(self):
             self.check_grad(
                 ['X'],
                 'Out',
-                check_prim=True,
+                check_prim=False,
                 check_pir=True,
                 check_prim_pir=True,
                 check_pir_onednn=self.check_pir_onednn,
@@ -831,7 +831,7 @@ def test_check_grad(self):
             self.check_grad(
                 ['X'],
                 'Out',
-                check_prim=True,
+                check_prim=False,
                 check_pir=True,
                 check_prim_pir=True,
                 check_pir_onednn=self.check_pir_onednn,
@@ -1683,7 +1683,7 @@ def test_check_grad(self):
             self.check_grad(
                 ['X'],
                 'Out',
-                check_prim=True,
+                check_prim=False,
                 check_pir=True,
                 check_prim_pir=True,
                 check_pir_onednn=self.check_pir_onednn,
@@ -1696,7 +1696,7 @@ def test_check_grad(self):
 
     def test_check_output(self):
         self.check_output(
-            check_prim=True,
+            check_prim=False,
             check_pir=True,
             check_prim_pir=True,
             check_pir_onednn=self.check_pir_onednn,
@@ -1726,7 +1726,7 @@ def test_check_grad(self):
         self.check_grad(
             ['X'],
             'Out',
-            check_prim=True,
+            check_prim=False,
             check_pir=True,
             check_prim_pir=True,
             check_pir_onednn=self.check_pir_onednn,
@@ -1811,7 +1811,7 @@ def test_check_grad(self):
             place,
             ['X'],
             'Out',
-            check_prim=True,
+            check_prim=False,
             check_pir=True,
             check_prim_pir=True,
         )
@@ -1845,7 +1845,7 @@ def test_check_grad(self):
             ['X'],
             'Out',
             check_dygraph=True,
-            check_prim=True,
+            check_prim=False,
             check_pir=True,
             check_prim_pir=True,
             check_pir_onednn=self.check_pir_onednn,
@@ -1854,7 +1854,7 @@ def test_check_grad(self):
     def test_check_output(self):
         self.check_output(
             check_dygraph=True,
-            check_prim=True,
+            check_prim=False,
             check_pir=True,
             check_prim_pir=True,
             check_pir_onednn=self.check_pir_onednn,
@@ -1888,7 +1888,7 @@ def test_check_grad(self):
             ['X'],
             'Out',
             check_dygraph=True,
-            check_prim=True,
+            check_prim=False,
             check_pir=True,
             check_prim_pir=True,
             check_pir_onednn=self.check_pir_onednn,
@@ -1897,7 +1897,7 @@ def test_check_grad(self):
     def test_check_output(self):
         self.check_output(
             check_dygraph=True,
-            check_prim=True,
+            check_prim=False,
             check_pir=True,
             check_prim_pir=True,
             check_pir_onednn=self.check_pir_onednn,
@@ -1951,7 +1951,7 @@ def test_check_grad(self):
             ['X'],
             'Out',
             max_relative_error=0.0005,
-            check_prim=True,
+            check_prim=False,
             check_pir=True,
             check_prim_pir=True,
             check_pir_onednn=self.check_pir_onednn,
@@ -2008,7 +2008,7 @@ def test_check_grad(self):
         self.check_grad(
             ['X'],
             'Out',
-            check_prim=True,
+            check_prim=False,
             check_pir=True,
             check_prim_pir=True,
             check_pir_onednn=self.check_pir_onednn,
@@ -2147,8 +2147,8 @@ def test_check_grad_for_prim(self):
                 paddle.CUDAPlace(0),
                 ['X'],
                 'Out',
-                check_prim=True,
-                only_check_prim=True,
+                check_prim=False,
+                only_check_prim=False,
                 check_pir=True,
                 check_prim_pir=True,
             )
@@ -2234,7 +2234,7 @@ def test_check_grad(self):
             self.check_grad(
                 ['X'],
                 'Out',
-                check_prim=True,
+                check_prim=False,
                 check_pir=True,
                 check_prim_pir=True,
                 check_pir_onednn=self.check_pir_onednn,
@@ -2462,7 +2462,7 @@ def test_check_grad(self):
             self.check_grad(
                 ['X'],
                 'Out',
-                check_prim=True,
+                check_prim=False,
                 check_pir=True,
                 check_prim_pir=True,
                 check_pir_onednn=self.check_pir_onednn,
@@ -2817,7 +2817,7 @@ def test_check_grad(self):
         self.check_grad(
             ['X'],
             'Out',
-            check_prim=True,
+            check_prim=False,
             check_pir=True,
             check_prim_pir=True,
             check_pir_onednn=self.check_pir_onednn,
@@ -2825,7 +2825,7 @@ def test_check_grad(self):
 
     def test_check_output(self):
         self.check_output(
-            check_prim=True,
+            check_prim=False,
             check_pir=True,
             check_prim_pir=True,
             check_pir_onednn=self.check_pir_onednn,
@@ -2962,7 +2962,7 @@ def if_enable_cinn(self):
 
     def test_check_output(self):
         self.check_output(
-            check_prim=True,
+            check_prim=False,
             check_pir=True,
             check_prim_pir=True,
             check_pir_onednn=self.check_pir_onednn,
@@ -2975,7 +2975,7 @@ def test_check_grad(self):
         self.check_grad(
             ['X'],
             'Out',
-            check_prim=True,
+            check_prim=False,
             check_pir=True,
             check_prim_pir=True,
             check_pir_onednn=self.check_pir_onednn,
@@ -3103,7 +3103,7 @@ def setUp(self):
 
     def test_check_output(self):
         self.check_output(
-            check_prim=True,
+            check_prim=False,
             check_pir=True,
             check_prim_pir=False,
             check_pir_onednn=self.check_pir_onednn,
@@ -3116,7 +3116,7 @@ def test_check_grad(self):
         self.check_grad(
             ['X'],
             'Out',
-            check_prim=True,
+            check_prim=False,
             check_pir=True,
             check_prim_pir=True,
             check_pir_onednn=self.check_pir_onednn,
@@ -3156,7 +3156,7 @@ def if_enable_cinn(self):
 
     def test_check_output(self):
         self.check_output(
-            check_prim=True,
+            check_prim=False,
             check_pir=True,
             check_prim_pir=True,
             check_pir_onednn=self.check_pir_onednn,
@@ -3169,7 +3169,7 @@ def test_check_grad(self):
         self.check_grad(
             ['X'],
             'Out',
-            check_prim=True,
+            check_prim=False,
             check_pir=True,
             check_prim_pir=True,
             check_pir_onednn=self.check_pir_onednn,
@@ -4001,7 +4001,7 @@ def test_check_grad(self):
         self.check_grad(
             ['X'],
             'Out',
-            check_prim=True,
+            check_prim=False,
             check_pir=True,
             check_prim_pir=True,
             check_pir_onednn=self.check_pir_onednn,
@@ -4679,7 +4679,7 @@ def if_enable_cinn(self):
 
     def test_check_output(self):
         self.check_output(
-            check_prim=True,
+            check_prim=False,
             check_prim_pir=True,
             check_pir=True,
             check_pir_onednn=self.check_pir_onednn,
@@ -4692,7 +4692,7 @@ def test_check_grad(self):
         self.check_grad(
             ['X'],
             'Out',
-            check_prim=True,
+            check_prim=False,
             check_prim_pir=True,
             check_pir=True,
             check_pir_onednn=self.check_pir_onednn,
@@ -5725,43 +5725,43 @@ def test_check_grad(self):
 
 create_test_act_fp16_class(TestActivation)
 create_test_act_fp16_class(
-    TestExpFp32_Prim, check_prim=True, enable_cinn=True, check_prim_pir=True
+    TestExpFp32_Prim, check_prim=False, enable_cinn=True, check_prim_pir=True
 )
 create_test_act_fp16_class(TestExpm1, check_prim_pir=True)
 create_test_act_fp16_class(
     TestSigmoid,
-    check_prim=True,
+    check_prim=False,
     enable_cinn=True,
     check_pir=True,
     check_prim_pir=True,
 )
 create_test_act_fp16_class(
-    TestSilu, check_prim=True, enable_cinn=True, check_prim_pir=True
+    TestSilu, check_prim=False, enable_cinn=True, check_prim_pir=True
 )
 create_test_act_fp16_class(TestLogSigmoid, check_pir=True)
 create_test_act_fp16_class(
-    TestTanh, check_prim=True, check_prim_pir=True, enable_cinn=True
+    TestTanh, check_prim=False, check_prim_pir=True, enable_cinn=True
 )
 create_test_act_fp16_class(TestTanhshrink, check_pir=True)
 create_test_act_fp16_class(TestHardShrink, check_pir=True)
 create_test_act_fp16_class(TestSoftshrink, check_pir=True)
 create_test_act_fp16_class(
     TestSqrt,
-    check_prim=True,
+    check_prim=False,
     enable_cinn=True,
     check_pir=True,
     check_prim_pir=True,
 )
 create_test_act_fp16_class(
     TestSqrtComp,
-    check_prim=True,
+    check_prim=False,
     enable_cinn=True,
     check_pir=True,
     check_prim_pir=True,
 )
 create_test_act_fp16_class(
     TestAbs,
-    check_prim=True,
+    check_prim=False,
     enable_cinn=True,
     check_pir=True,
     check_prim_pir=True,
@@ -5774,7 +5774,7 @@ def test_check_grad(self):
 )
 create_test_act_fp16_class(
     TestFloor,
-    check_prim=True,
+    check_prim=False,
     grad_check=False,
     enable_cinn=True,
     check_pir=True,
@@ -5794,14 +5794,14 @@ def test_check_grad(self):
 create_test_act_fp16_class(TestRound, grad_check=False, check_pir=True)
 create_test_act_fp16_class(
     TestRelu,
-    check_prim=True,
+    check_prim=False,
     enable_cinn=True,
     check_pir=True,
     check_prim_pir=True,
 )
 create_test_act_fp16_class(
     TestGelu,
-    check_prim=True,
+    check_prim=False,
     check_prim_pir=True,
     check_pir=True,
     enable_cinn=True,
@@ -5816,12 +5816,12 @@ def test_check_grad(self):
 create_test_act_fp16_class(TestELU, check_pir=True, check_prim_pir=True)
 create_test_act_fp16_class(TestCELU, check_pir=True)
 create_test_act_fp16_class(TestReciprocal, check_pir=True)
-create_test_act_fp16_class(TestLog, check_prim=True, check_pir=True)
+create_test_act_fp16_class(TestLog, check_prim=False, check_pir=True)
 create_test_act_fp16_class(TestLog2, check_pir=True)
 create_test_act_fp16_class(TestLog10, check_pir=True)
 create_test_act_fp16_class(TestLog1p, check_pir=True)
 create_test_act_fp16_class(TestSquare, check_pir=True, check_prim_pir=True)
-create_test_act_fp16_class(TestPow, check_prim=True, check_prim_pir=True)
+create_test_act_fp16_class(TestPow, check_prim=False, check_prim_pir=True)
 create_test_act_fp16_class(TestPow_API)
 create_test_act_fp16_class(TestSTanh)
 create_test_act_fp16_class(TestSoftplus, check_pir=True)
@@ -5830,31 +5830,31 @@ def test_check_grad(self):
 create_test_act_fp16_class(TestHardSigmoid, check_pir=True)
 create_test_act_fp16_class(TestSwish)
 create_test_act_fp16_class(
-    TestHardSwish, check_prim=True, check_pir=True, check_prim_pir=True
+    TestHardSwish, check_prim=False, check_pir=True, check_prim_pir=True
 )
 create_test_act_fp16_class(TestMish, check_pir=True)
 create_test_act_fp16_class(
     TestLeakyRelu,
-    check_prim=True,
+    check_prim=False,
     enable_cinn=True,
     check_pir=True,
     check_prim_pir=True,
 )
 create_test_act_fp16_class(
-    TestLeakyReluAlpha1, check_prim=True, enable_cinn=True, check_prim_pir=True
+    TestLeakyReluAlpha1, check_prim=False, enable_cinn=True, check_prim_pir=True
 )
 create_test_act_fp16_class(
-    TestLeakyReluAlpha2, check_prim=True, enable_cinn=True, check_prim_pir=True
+    TestLeakyReluAlpha2, check_prim=False, enable_cinn=True, check_prim_pir=True
 )
 create_test_act_fp16_class(
-    TestLeakyReluAlpha3, check_prim=True, enable_cinn=True, check_prim_pir=True
+    TestLeakyReluAlpha3, check_prim=False, enable_cinn=True, check_prim_pir=True
 )
 create_test_act_fp16_class(
-    TestLeakyRelu_ZeroDim, check_prim=True, check_prim_pir=True
+    TestLeakyRelu_ZeroDim, check_prim=False, check_prim_pir=True
 )
 create_test_act_fp16_class(
     TestRsqrt,
-    check_prim=True,
+    check_prim=False,
     enable_cinn=True,
     check_pir=True,
     check_prim_pir=True,
@@ -5926,26 +5926,26 @@ def test_check_grad(self):
 
 create_test_act_bf16_class(TestActivation)
 create_test_act_bf16_class(
-    TestExpFp32_Prim, check_prim=True, check_prim_pir=True
+    TestExpFp32_Prim, check_prim=False, check_prim_pir=True
 )
 create_test_act_bf16_class(TestExpm1, check_prim_pir=True)
 create_test_act_bf16_class(
-    TestSigmoid, check_prim=True, check_pir=True, check_prim_pir=True
+    TestSigmoid, check_prim=False, check_pir=True, check_prim_pir=True
 )
-create_test_act_bf16_class(TestSilu, check_prim=True, check_prim_pir=True)
+create_test_act_bf16_class(TestSilu, check_prim=False, check_prim_pir=True)
 create_test_act_bf16_class(TestLogSigmoid, check_pir=True)
-create_test_act_bf16_class(TestTanh, check_prim=True, check_prim_pir=True)
+create_test_act_bf16_class(TestTanh, check_prim=False, check_prim_pir=True)
 create_test_act_bf16_class(TestTanhshrink, check_pir=True)
 create_test_act_bf16_class(TestHardShrink, check_pir=True)
 create_test_act_bf16_class(TestSoftshrink, check_pir=True)
 create_test_act_bf16_class(
-    TestSqrt, check_prim=True, check_pir=True, check_prim_pir=True
+    TestSqrt, check_prim=False, check_pir=True, check_prim_pir=True
 )
 create_test_act_bf16_class(
-    TestSqrtComp, check_prim=True, check_pir=True, check_prim_pir=True
+    TestSqrtComp, check_prim=False, check_pir=True, check_prim_pir=True
 )
 create_test_act_bf16_class(
-    TestAbs, check_prim=True, check_pir=True, check_prim_pir=True
+    TestAbs, check_prim=False, check_pir=True, check_prim_pir=True
 )
 create_test_act_bf16_class(
     TestCeil,
@@ -5956,7 +5956,7 @@ def test_check_grad(self):
 create_test_act_bf16_class(
     TestFloor,
     grad_check=False,
-    check_prim=True,
+    check_prim=False,
     check_pir=True,
     check_prim_pir=True,
 )
@@ -5973,11 +5973,11 @@ def test_check_grad(self):
 create_test_act_bf16_class(TestAtanh, check_pir=True)
 create_test_act_bf16_class(TestRound, grad_check=False, check_pir=True)
 create_test_act_bf16_class(
-    TestRelu, check_prim=True, check_pir=True, check_prim_pir=True
+    TestRelu, check_prim=False, check_pir=True, check_prim_pir=True
 )
 create_test_act_bf16_class(
     TestGelu,
-    check_prim=True,
+    check_prim=False,
     check_pir=True,
     rev_comp_rtol=1e-2,
     rev_comp_atol=1e-2,
@@ -5990,12 +5990,12 @@ def test_check_grad(self):
 create_test_act_bf16_class(TestELU, check_pir=True, check_prim_pir=True)
 create_test_act_bf16_class(TestCELU, check_pir=True)
 create_test_act_bf16_class(TestReciprocal, check_pir=True)
-create_test_act_bf16_class(TestLog, check_prim=True, check_pir=True)
+create_test_act_bf16_class(TestLog, check_prim=False, check_pir=True)
 create_test_act_bf16_class(TestLog2, check_pir=True)
 create_test_act_bf16_class(TestLog10, check_pir=True)
 create_test_act_bf16_class(TestLog1p, check_pir=True)
 create_test_act_bf16_class(TestSquare, check_pir=True, check_prim_pir=True)
-create_test_act_bf16_class(TestPow, check_prim=True)
+create_test_act_bf16_class(TestPow, check_prim=False)
 create_test_act_bf16_class(TestPow_API)
 create_test_act_bf16_class(TestSTanh)
 create_test_act_bf16_class(TestSoftplus, check_pir=True)
@@ -6004,26 +6004,26 @@ def test_check_grad(self):
 create_test_act_bf16_class(TestHardSigmoid, check_pir=True)
 create_test_act_bf16_class(TestSwish)
 create_test_act_bf16_class(
-    TestHardSwish, check_prim=True, check_pir=True, check_prim_pir=True
+    TestHardSwish, check_prim=False, check_pir=True, check_prim_pir=True
 )
 create_test_act_bf16_class(TestMish, check_pir=True)
 create_test_act_bf16_class(
-    TestLeakyRelu, check_prim=True, check_pir=True, check_prim_pir=True
+    TestLeakyRelu, check_prim=False, check_pir=True, check_prim_pir=True
 )
 create_test_act_bf16_class(
-    TestLeakyReluAlpha1, check_prim=True, check_prim_pir=True
+    TestLeakyReluAlpha1, check_prim=False, check_prim_pir=True
 )
 create_test_act_bf16_class(
-    TestLeakyReluAlpha2, check_prim=True, check_prim_pir=True
+    TestLeakyReluAlpha2, check_prim=False, check_prim_pir=True
 )
 create_test_act_bf16_class(
-    TestLeakyReluAlpha3, check_prim=True, check_prim_pir=True
+    TestLeakyReluAlpha3, check_prim=False, check_prim_pir=True
 )
 create_test_act_bf16_class(
-    TestLeakyRelu_ZeroDim, check_prim=True, check_prim_pir=True
+    TestLeakyRelu_ZeroDim, check_prim=False, check_prim_pir=True
 )
 create_test_act_bf16_class(
-    TestRsqrt, check_prim=True, check_pir=True, check_prim_pir=True
+    TestRsqrt, check_prim=False, check_pir=True, check_prim_pir=True
 )
 
 
diff --git a/test/legacy_test/test_complex_op.py b/test/legacy_test/test_complex_op.py
index d0df015677f6b0..aedb3f4b0254e3 100644
--- a/test/legacy_test/test_complex_op.py
+++ b/test/legacy_test/test_complex_op.py
@@ -260,5 +260,43 @@ def run_complex(test_type):
         np.testing.assert_equal(z4, None)
 
 
+class TestComplexOut(unittest.TestCase):
+    def setUp(self):
+        paddle.disable_static()
+        self.shape = [3, 4]
+        self.real_np = np.random.rand(*self.shape).astype(np.float32)
+        self.imag_np = np.random.rand(*self.shape).astype(np.float32)
+        self.test_types = ["out"]
+
+    def do_test(self, test_type):
+        real = paddle.to_tensor(self.real_np, stop_gradient=False)
+        imag = paddle.to_tensor(self.imag_np, stop_gradient=False)
+
+        if test_type == 'raw':
+            result = paddle.complex(real, imag)
+            result.real().mean().backward()
+            return result, real.grad, imag.grad
+        elif test_type == 'out':
+            out = paddle.empty(self.shape, dtype='complex64')
+            out.stop_gradient = False
+            paddle.complex(real, imag, out=out)
+            out.real().mean().backward()
+            return out, real.grad, imag.grad
+        else:
+            raise ValueError(f"Unknown test type: {test_type}")
+
+    def test_out(self):
+        out_std, real_grad_std, imag_grad_std = self.do_test('raw')
+        for test_type in self.test_types:
+            out, real_grad, imag_grad = self.do_test(test_type)
+            np.testing.assert_allclose(out.numpy(), out_std.numpy(), rtol=1e-20)
+            np.testing.assert_allclose(
+                real_grad.numpy(), real_grad_std.numpy(), rtol=1e-20
+            )
+            np.testing.assert_allclose(
+                imag_grad.numpy(), imag_grad_std.numpy(), rtol=1e-20
+            )
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/legacy_test/test_cos.py b/test/legacy_test/test_cos.py
new file mode 100644
index 00000000000000..ab63edfe3ce295
--- /dev/null
+++ b/test/legacy_test/test_cos.py
@@ -0,0 +1,64 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+
+import paddle
+
+
+class TestCosOutAndParamDecorator(unittest.TestCase):
+    def setUp(self):
+        paddle.disable_static()
+        self.x_np = np.random.rand(3, 4).astype(np.float32)
+        self.test_types = ["decorator", "out", "out_decorator"]
+
+    def do_test(self, test_type):
+        x = paddle.to_tensor(self.x_np, stop_gradient=False)
+        if test_type == 'raw':
+            result = paddle.cos(x)
+            result.mean().backward()
+            return result, x.grad
+        elif test_type == 'decorator':
+            result = paddle.cos(input=x)
+            result.mean().backward()
+            return result, x.grad
+        elif test_type == 'out':
+            out = paddle.empty_like(x)
+            out.stop_gradient = False
+            paddle.cos(x, out=out)
+            out.mean().backward()
+            return out, x.grad
+        elif test_type == 'out_decorator':
+            out = paddle.empty_like(x)
+            out.stop_gradient = False
+            paddle.cos(input=x, out=out)
+            out.mean().backward()
+            return out, x.grad
+        else:
+            raise ValueError(f"Unknown test type: {test_type}")
+
+    def test_all(self):
+        out_std, grad_std = self.do_test('raw')
+        for test_type in self.test_types:
+            out, grad = self.do_test(test_type)
+            np.testing.assert_allclose(out.numpy(), out_std.numpy(), rtol=1e-7)
+            np.testing.assert_allclose(
+                grad.numpy(), grad_std.numpy(), rtol=1e-7
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/legacy_test/test_floor.py b/test/legacy_test/test_floor.py
new file mode 100644
index 00000000000000..d230f45306cf90
--- /dev/null
+++ b/test/legacy_test/test_floor.py
@@ -0,0 +1,64 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+
+import paddle
+
+
+class TestFloorOutAndParamDecorator(unittest.TestCase):
+    def setUp(self):
+        paddle.disable_static()
+        self.x_np = np.random.uniform(-10, 10, [3, 4]).astype(np.float32)
+        self.test_types = ["decorator", "out", "out_decorator"]
+
+    def do_test(self, test_type):
+        x = paddle.to_tensor(self.x_np, stop_gradient=False)
+        if test_type == 'raw':
+            result = paddle.floor(x)
+            result.mean().backward()
+            return result, x.grad
+        elif test_type == 'decorator':
+            result = paddle.floor(input=x)
+            result.mean().backward()
+            return result, x.grad
+        elif test_type == 'out':
+            out = paddle.empty_like(x)
+            out.stop_gradient = False
+            paddle.floor(x, out=out)
+            out.mean().backward()
+            return out, x.grad
+        elif test_type == 'out_decorator':
+            out = paddle.empty_like(x)
+            out.stop_gradient = False
+            paddle.floor(input=x, out=out)
+            out.mean().backward()
+            return out, x.grad
+        else:
+            raise ValueError(f"Unknown test type: {test_type}")
+
+    def test_all(self):
+        out_std, grad_std = self.do_test('raw')
+        for test_type in self.test_types:
+            out, grad = self.do_test(test_type)
+            np.testing.assert_allclose(out.numpy(), out_std.numpy(), rtol=1e-20)
+            np.testing.assert_allclose(
+                grad.numpy(), grad_std.numpy(), rtol=1e-20
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/legacy_test/test_layer_norm_op.py b/test/legacy_test/test_layer_norm_op.py
index fee3b2ca21f0bb..791d2aa7595841 100644
--- a/test/legacy_test/test_layer_norm_op.py
+++ b/test/legacy_test/test_layer_norm_op.py
@@ -174,7 +174,7 @@ def initConfig(self):
         self.begin_norm_axis = 1
         self.has_scale = True
         self.has_bias = True
-        self.check_prim = True
+        self.check_prim = False
         self.check_prim_pir = True
         self.check_pir = True
 
@@ -272,7 +272,7 @@ def initConfig(self):
         self.begin_norm_axis = 1
         self.has_scale = True
         self.has_bias = True
-        self.check_prim = True
+        self.check_prim = False
         self.check_prim_pir = True
         self.check_pir = True
 
@@ -494,7 +494,7 @@ def initConfig(self):
         self.begin_norm_axis = 1
         self.has_scale = True
         self.has_bias = True
-        self.check_prim = True
+        self.check_prim = False
         self.check_prim_pir = True
         self.check_pir = True
 
@@ -514,7 +514,7 @@ def initConfig(self):
         self.begin_norm_axis = 1
         self.has_scale = True
         self.has_bias = True
-        self.check_prim = True
+        self.check_prim = False
         self.check_prim_pir = True
         self.check_pir = True
 
diff --git a/test/legacy_test/test_log.py b/test/legacy_test/test_log.py
new file mode 100644
index 00000000000000..e73a68e99ae859
--- /dev/null
+++ b/test/legacy_test/test_log.py
@@ -0,0 +1,64 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+
+import paddle
+
+
+class TestLogOutAndParamDecorator(unittest.TestCase):
+    def setUp(self):
+        paddle.disable_static()
+        self.x_np = np.random.uniform(0.1, 1, [3, 4]).astype(np.float32)
+        self.test_types = ["decorator", "out", "out_decorator"]
+
+    def do_test(self, test_type):
+        x = paddle.to_tensor(self.x_np, stop_gradient=False)
+        if test_type == 'raw':
+            result = paddle.log(x)
+            result.mean().backward()
+            return result, x.grad
+        elif test_type == 'decorator':
+            result = paddle.log(input=x)
+            result.mean().backward()
+            return result, x.grad
+        elif test_type == 'out':
+            out = paddle.empty_like(x)
+            out.stop_gradient = False
+            paddle.log(x, out=out)
+            out.mean().backward()
+            return out, x.grad
+        elif test_type == 'out_decorator':
+            out = paddle.empty_like(x)
+            out.stop_gradient = False
+            paddle.log(input=x, out=out)
+            out.mean().backward()
+            return out, x.grad
+        else:
+            raise ValueError(f"Unknown test type: {test_type}")
+
+    def test_all(self):
+        out_std, grad_std = self.do_test('raw')
+        for test_type in self.test_types:
+            out, grad = self.do_test(test_type)
+            np.testing.assert_allclose(out.numpy(), out_std.numpy(), rtol=1e-20)
+            np.testing.assert_allclose(
+                grad.numpy(), grad_std.numpy(), rtol=1e-20
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/legacy_test/test_multiply.py b/test/legacy_test/test_multiply.py
index 8f8f07680da961..e302843a177bb3 100755
--- a/test/legacy_test/test_multiply.py
+++ b/test/legacy_test/test_multiply.py
@@ -303,5 +303,65 @@ def test_multiply(self):
         assert y.grad.dtype == paddle.bfloat16
 
 
+class TestMultiplyOutAndParamDecorator(unittest.TestCase):
+    def setUp(self):
+        paddle.disable_static()
+        self.x_np = np.random.rand(3, 4).astype(np.float32)
+        self.y_np = np.random.rand(3, 4).astype(np.float32)
+        self.test_types = [
+            # "decorator_input",
+            # "decorator_other",
+            # "decorator_both",
+            "out",
+            # "out_decorator",
+        ]
+
+    def do_test(self, test_type):
+        x = paddle.to_tensor(self.x_np, stop_gradient=False)
+        y = paddle.to_tensor(self.y_np, stop_gradient=False)
+        if test_type == 'raw':
+            result = paddle.multiply(x, y)
+            result.mean().backward()
+            return result, x.grad, y.grad
+        elif test_type == 'decorator_input':
+            result = paddle.multiply(input=x, y=y)
+            result.mean().backward()
+            return result, x.grad, y.grad
+        elif test_type == 'decorator_other':
+            result = paddle.multiply(x, other=y)
+            result.mean().backward()
+            return result, x.grad, y.grad
+        elif test_type == 'decorator_both':
+            result = paddle.multiply(input=x, other=y)
+            result.mean().backward()
+            return result, x.grad, y.grad
+        elif test_type == 'out':
+            out = paddle.empty_like(x)
+            out.stop_gradient = False
+            paddle.multiply(x, y, out=out)
+            out.mean().backward()
+            return out, x.grad, y.grad
+        elif test_type == 'out_decorator':
+            out = paddle.empty_like(x)
+            out.stop_gradient = False
+            paddle.multiply(input=x, other=y, out=out)
+            out.mean().backward()
+            return out, x.grad, y.grad
+        else:
+            raise ValueError(f"Unknown test type: {test_type}")
+
+    def test_all(self):
+        out_std, x_grad_std, y_grad_std = self.do_test('raw')
+        for test_type in self.test_types:
+            out, x_grad, y_grad = self.do_test(test_type)
+            np.testing.assert_allclose(out.numpy(), out_std.numpy(), rtol=1e-20)
+            np.testing.assert_allclose(
+                x_grad.numpy(), x_grad_std.numpy(), rtol=1e-20
+            )
+            np.testing.assert_allclose(
+                y_grad.numpy(), y_grad_std.numpy(), rtol=1e-20
+            )
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/legacy_test/test_polar.py b/test/legacy_test/test_polar.py
index 5c8afcdd67fe3c..f365ad2efdc7cb 100644
--- a/test/legacy_test/test_polar.py
+++ b/test/legacy_test/test_polar.py
@@ -131,5 +131,43 @@ def init_input(self):
         self.angle = np.random.random([0, 1])
 
 
+class TestPolarOut(unittest.TestCase):
+    def setUp(self):
+        paddle.disable_static()
+        self.shape = [3, 4]
+        self.abs_np = np.random.rand(*self.shape).astype(np.float32)
+        self.angle_np = np.random.rand(*self.shape).astype(np.float32)
+        self.test_types = ["out"]
+
+    def do_test(self, test_type):
+        abs_t = paddle.to_tensor(self.abs_np, stop_gradient=False)
+        angle_t = paddle.to_tensor(self.angle_np, stop_gradient=False)
+
+        if test_type == 'raw':
+            result = paddle.polar(abs_t, angle_t)
+            result.real().mean().backward()
+            return result, abs_t.grad, angle_t.grad
+        elif test_type == 'out':
+            out = paddle.empty(self.shape, dtype='complex64')
+            out.stop_gradient = False
+            paddle.polar(abs_t, angle_t, out=out)
+            out.real().mean().backward()
+            return out, abs_t.grad, angle_t.grad
+        else:
+            raise ValueError(f"Unknown test type: {test_type}")
+
+    def test_out(self):
+        out_std, abs_grad_std, angle_grad_std = self.do_test('raw')
+        for test_type in self.test_types:
+            out, abs_grad, angle_grad = self.do_test(test_type)
+            np.testing.assert_allclose(out.numpy(), out_std.numpy(), rtol=1e-6)
+            np.testing.assert_allclose(
+                abs_grad.numpy(), abs_grad_std.numpy(), rtol=1e-6
+            )
+            np.testing.assert_allclose(
+                angle_grad.numpy(), angle_grad_std.numpy(), rtol=1e-6
+            )
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/legacy_test/test_pow.py b/test/legacy_test/test_pow.py
index 7daa042255f576..dba6cdd0da1b0e 100755
--- a/test/legacy_test/test_pow.py
+++ b/test/legacy_test/test_pow.py
@@ -304,5 +304,65 @@ def test_xpowy(self):
                     )
 
 
+class TestPowOutAndParamDecorator(unittest.TestCase):
+    def setUp(self):
+        paddle.disable_static()
+        self.x_np = np.random.uniform(0.1, 1, [3, 4]).astype(np.float32)
+        self.y_np = np.random.uniform(1, 3, [3, 4]).astype(np.float32)
+        self.test_types = [
+            "decorator_input",
+            "decorator_exponent",
+            "decorator_both",
+            "out",
+            "out_decorator",
+        ]
+
+    def do_test(self, test_type):
+        x = paddle.to_tensor(self.x_np, stop_gradient=False)
+        y = paddle.to_tensor(self.y_np, stop_gradient=False)
+        if test_type == 'raw':
+            result = paddle.pow(x, y)
+            result.mean().backward()
+            return result, x.grad, y.grad
+        elif test_type == 'decorator_input':
+            result = paddle.pow(input=x, y=y)
+            result.mean().backward()
+            return result, x.grad, y.grad
+        elif test_type == 'decorator_exponent':
+            result = paddle.pow(x, exponent=y)
+            result.mean().backward()
+            return result, x.grad, y.grad
+        elif test_type == 'decorator_both':
+            result = paddle.pow(input=x, exponent=y)
+            result.mean().backward()
+            return result, x.grad, y.grad
+        elif test_type == 'out':
+            out = paddle.empty_like(x)
+            out.stop_gradient = False
+            paddle.pow(x, y, out=out)
+            out.mean().backward()
+            return out, x.grad, y.grad
+        elif test_type == 'out_decorator':
+            out = paddle.empty_like(x)
+            out.stop_gradient = False
+            paddle.pow(input=x, exponent=y, out=out)
+            out.mean().backward()
+            return out, x.grad, y.grad
+        else:
+            raise ValueError(f"Unknown test type: {test_type}")
+
+    def test_all(self):
+        out_std, x_grad_std, y_grad_std = self.do_test('raw')
+        for test_type in self.test_types:
+            out, x_grad, y_grad = self.do_test(test_type)
+            np.testing.assert_allclose(out.numpy(), out_std.numpy(), rtol=1e-6)
+            np.testing.assert_allclose(
+                x_grad.numpy(), x_grad_std.numpy(), rtol=1e-6
+            )
+            np.testing.assert_allclose(
+                y_grad.numpy(), y_grad_std.numpy(), rtol=1e-6
+            )
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/legacy_test/test_rsqrt.py b/test/legacy_test/test_rsqrt.py
new file mode 100644
index 00000000000000..a3a9e02771e518
--- /dev/null
+++ b/test/legacy_test/test_rsqrt.py
@@ -0,0 +1,64 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+
+import paddle
+
+
+class TestRsqrtOutAndParamDecorator(unittest.TestCase):
+    def setUp(self):
+        paddle.disable_static()
+        self.x_np = np.random.uniform(0.1, 1, [3, 4]).astype(np.float32)
+        self.test_types = ["decorator", "out", "out_decorator"]
+
+    def do_test(self, test_type):
+        x = paddle.to_tensor(self.x_np, stop_gradient=False)
+        if test_type == 'raw':
+            result = paddle.rsqrt(x)
+            result.mean().backward()
+            return result, x.grad
+        elif test_type == 'decorator':
+            result = paddle.rsqrt(input=x)
+            result.mean().backward()
+            return result, x.grad
+        elif test_type == 'out':
+            out = paddle.empty_like(x)
+            out.stop_gradient = False
+            paddle.rsqrt(x, out=out)
+            out.mean().backward()
+            return out, x.grad
+        elif test_type == 'out_decorator':
+            out = paddle.empty_like(x)
+            out.stop_gradient = False
+            paddle.rsqrt(input=x, out=out)
+            out.mean().backward()
+            return out, x.grad
+        else:
+            raise ValueError(f"Unknown test type: {test_type}")
+
+    def test_all(self):
+        out_std, grad_std = self.do_test('raw')
+        for test_type in self.test_types:
+            out, grad = self.do_test(test_type)
+            np.testing.assert_allclose(out.numpy(), out_std.numpy(), rtol=1e-7)
+            np.testing.assert_allclose(
+                grad.numpy(), grad_std.numpy(), rtol=1e-7
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/legacy_test/test_sign_op.py b/test/legacy_test/test_sign_op.py
index be6ef62b1c0da0..f2de83fb0e9020 100644
--- a/test/legacy_test/test_sign_op.py
+++ b/test/legacy_test/test_sign_op.py
@@ -304,6 +304,48 @@ def test_grad(self):
             self.func(p)
 
 
+class TestSignOutAndParamDecorator(unittest.TestCase):
+    def setUp(self):
+        paddle.disable_static()
+        self.x_np = np.random.randn(3, 4).astype(np.float32)
+        self.x_np[self.x_np == 0] = 1  # Avoid zero for gradient check
+        self.test_types = ["decorator", "out", "out_decorator"]
+
+    def do_test(self, test_type):
+        x = paddle.to_tensor(self.x_np, stop_gradient=False)
+        if test_type == 'raw':
+            result = paddle.sign(x)
+            result.mean().backward()
+            return result, x.grad
+        elif test_type == 'decorator':
+            result = paddle.sign(input=x)
+            result.mean().backward()
+            return result, x.grad
+        elif test_type == 'out':
+            out = paddle.empty_like(x)
+            out.stop_gradient = False
+            paddle.sign(x, out=out)
+            out.mean().backward()
+            return out, x.grad
+        elif test_type == 'out_decorator':
+            out = paddle.empty_like(x)
+            out.stop_gradient = False
+            paddle.sign(input=x, out=out)
+            out.mean().backward()
+            return out, x.grad
+        else:
+            raise ValueError(f"Unknown test type: {test_type}")
+
+    def test_all(self):
+        out_std, grad_std = self.do_test('raw')
+        for test_type in self.test_types:
+            out, grad = self.do_test(test_type)
+            np.testing.assert_allclose(out.numpy(), out_std.numpy(), rtol=1e-20)
+            np.testing.assert_allclose(
+                grad.numpy(), grad_std.numpy(), rtol=1e-20
+            )
+
+
 if __name__ == "__main__":
     paddle.enable_static()
     unittest.main()
diff --git a/test/legacy_test/test_sin.py b/test/legacy_test/test_sin.py
new file mode 100644
index 00000000000000..a3c52c2b39401f
--- /dev/null
+++ b/test/legacy_test/test_sin.py
@@ -0,0 +1,64 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+
+import paddle
+
+
+class TestSinOutAndParamDecorator(unittest.TestCase):
+    def setUp(self):
+        paddle.disable_static()
+        self.x_np = np.random.rand(3, 4).astype(np.float32)
+        self.test_types = ["decorator", "out", "out_decorator"]
+
+    def do_test(self, test_type):
+        x = paddle.to_tensor(self.x_np, stop_gradient=False)
+        if test_type == 'raw':
+            result = paddle.sin(x)
+            result.mean().backward()
+            return result, x.grad
+        elif test_type == 'decorator':
+            result = paddle.sin(input=x)
+            result.mean().backward()
+            return result, x.grad
+        elif test_type == 'out':
+            out = paddle.empty_like(x)
+            out.stop_gradient = False
+            paddle.sin(x, out=out)
+            out.mean().backward()
+            return out, x.grad
+        elif test_type == 'out_decorator':
+            out = paddle.empty_like(x)
+            out.stop_gradient = False
+            paddle.sin(input=x, out=out)
+            out.mean().backward()
+            return out, x.grad
+        else:
+            raise ValueError(f"Unknown test type: {test_type}")
+
+    def test_all(self):
+        out_std, grad_std = self.do_test('raw')
+        for test_type in self.test_types:
+            out, grad = self.do_test(test_type)
+            np.testing.assert_allclose(out.numpy(), out_std.numpy(), rtol=1e-7)
+            np.testing.assert_allclose(
+                grad.numpy(), grad_std.numpy(), rtol=1e-7
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/legacy_test/test_stack_op.py b/test/legacy_test/test_stack_op.py
index ce935fea850903..0b9a5cfb84344c 100644
--- a/test/legacy_test/test_stack_op.py
+++ b/test/legacy_test/test_stack_op.py
@@ -541,5 +541,70 @@ def test_static_gpu(self):
                 np.testing.assert_equal(expected_result, result)
 
 
+class TestStackOutAndParamDecorator(unittest.TestCase):
+    def setUp(self):
+        paddle.disable_static()
+        self.inputs_np = [
+            np.random.rand(2, 3).astype(np.float32) for _ in range(3)
+        ]
+        self.test_types = [
+            "decorator_tensors",
+            "decorator_dim",
+            "decorator_both",
+            "out",
+            "out_decorator",
+        ]
+
+    def do_test(self, test_type):
+        inputs = [
+            paddle.to_tensor(x, stop_gradient=False) for x in self.inputs_np
+        ]
+
+        if test_type == 'raw':
+            result = paddle.stack(inputs, axis=1)
+            result.mean().backward()
+            grads = [x.grad for x in inputs]
+            return result, grads
+        elif test_type == 'decorator_tensors':
+            result = paddle.stack(tensors=inputs, axis=1)
+            result.mean().backward()
+            grads = [x.grad for x in inputs]
+            return result, grads
+        elif test_type == 'decorator_dim':
+            result = paddle.stack(inputs, dim=1)
+            result.mean().backward()
+            grads = [x.grad for x in inputs]
+            return result, grads
+        elif test_type == 'decorator_both':
+            result = paddle.stack(tensors=inputs, dim=1)
+            result.mean().backward()
+            grads = [x.grad for x in inputs]
+            return result, grads
+        elif test_type == 'out':
+            out = paddle.empty((2, 3, 3), dtype='float32')
+            out.stop_gradient = False
+            paddle.stack(inputs, axis=1, out=out)
+            out.mean().backward()
+            grads = [x.grad for x in inputs]
+            return out, grads
+        elif test_type == 'out_decorator':
+            out = paddle.empty((2, 3, 3), dtype='float32')
+            out.stop_gradient = False
+            paddle.stack(tensors=inputs, dim=1, out=out)
+            out.mean().backward()
+            grads = [x.grad for x in inputs]
+            return out, grads
+        else:
+            raise ValueError(f"Unknown test type: {test_type}")
+
+    def test_all(self):
+        out_std, grads_std = self.do_test('raw')
+        for test_type in self.test_types:
+            out, grads = self.do_test(test_type)
+            np.testing.assert_allclose(out.numpy(), out_std.numpy(), rtol=1e-20)
+            for g, g_std in zip(grads, grads_std):
+                np.testing.assert_allclose(g.numpy(), g_std.numpy(), rtol=1e-20)
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/prim/pir_prim/test_builtin_slice.py b/test/prim/pir_prim/test_builtin_slice.py
index a6b34c306624ee..94e96e84cd2681 100644
--- a/test/prim/pir_prim/test_builtin_slice.py
+++ b/test/prim/pir_prim/test_builtin_slice.py
@@ -17,7 +17,6 @@
 import numpy as np
 
 import paddle
-from paddle import pir
 from paddle.decomposition import decompose
 from paddle.framework import core
 
@@ -42,22 +41,20 @@ def setUp(self):
 
     def get_ir_program(self):
         paddle.enable_static()
-        with paddle.pir_utils.OldIrGuard():
-            main_program, start_program = (
-                paddle.static.Program(),
-                paddle.static.Program(),
-            )
-            with paddle.static.program_guard(main_program, start_program):
-                x1 = paddle.static.data('x1', self.c_shape, self.dtype)
-                x2 = paddle.static.data('x2', self.c_shape, self.dtype)
-                x3 = paddle.static.data('x3', self.c_shape, self.dtype)
-                x4 = paddle.static.data('x4', self.c_shape, self.dtype)
-                y = meshgrid_net(x1, x2, x3, x4)
-                res1 = paddle.tanh(y[0])
-                res2 = paddle.sin(y[1])
-                res3 = paddle.cos(y[2])
-            pir_program = pir.translate_to_pir(main_program.desc)
-            return pir_program
+        main_program, start_program = (
+            paddle.static.Program(),
+            paddle.static.Program(),
+        )
+        with paddle.static.program_guard(main_program, start_program):
+            x1 = paddle.static.data('x1', self.c_shape, self.dtype)
+            x2 = paddle.static.data('x2', self.c_shape, self.dtype)
+            x3 = paddle.static.data('x3', self.c_shape, self.dtype)
+            x4 = paddle.static.data('x4', self.c_shape, self.dtype)
+            y = meshgrid_net(x1, x2, x3, x4)
+            res1 = paddle.tanh(y[0])
+            res2 = paddle.sin(y[1])
+            res3 = paddle.cos(y[2])
+        return main_program
 
     def test_build_op(self):
         pir_program = self.get_ir_program()
diff --git a/test/prim/pir_prim/test_decompose_op.py b/test/prim/pir_prim/test_decompose_op.py
index e5df36821e4bab..2f93b0bf248a67 100644
--- a/test/prim/pir_prim/test_decompose_op.py
+++ b/test/prim/pir_prim/test_decompose_op.py
@@ -15,12 +15,8 @@
 
 import unittest
 
-import numpy as np
-
 import paddle
 from paddle import pir
-from paddle.base import core
-from paddle.decomposition import decomp
 
 paddle.enable_static()
 
@@ -67,74 +63,5 @@ def get_pir_program_and_param_map():
         return pir_program, param_mapping
 
 
-class TestDecomposeOp(unittest.TestCase):
-    def setUp(self):
-        np.random.seed(2023)
-        self.shape_x = [3, 3]
-        self.x = np.random.random(self.shape_x).astype("float32")
-        self.shape_y = [3, 3]
-        self.y = np.random.random(self.shape_y).astype("float32")
-        self.shape_z = [3, 3]
-        self.z = np.random.random(self.shape_z).astype("float32")
-
-    def net(self, flag=None):
-        (
-            pir_program,
-            param_mapping,
-        ) = get_pir_program_and_param_map()
-
-        pir_ops = pir_program.global_block().ops
-        fetch_list = [pir_ops[12].result(0)]
-
-        if flag == "decompose":
-            core._set_prim_forward_enabled(True)
-            core._set_prim_backward_enabled(True)
-
-            # get the grad_var_to_var
-            grad_var_to_var = {
-                'concat_0.tmp_0@GRAD': 'concat_0.tmp_0',
-                'dropout_0.tmp_0@GRAD': 'dropout_0.tmp_0',
-                'elementwise_add_0@GRAD': 'elementwise_add_0',
-                'elementwise_add_1@GRAD': 'elementwise_add_1',
-                'elementwise_mul_0@GRAD': 'elementwise_mul_0',
-                'layer_norm_0.tmp_2@GRAD': 'layer_norm_0.tmp_2',
-                'matmul_v2_0.tmp_0@GRAD': 'matmul_v2_0.tmp_0',
-                'mean_0.tmp_0@GRAD': 'mean_0.tmp_0',
-                'mean_1.tmp_0@GRAD': 'mean_1.tmp_0',
-                'rsqrt_0.tmp_0@GRAD': 'rsqrt_0.tmp_0',
-                'x@GRAD': 'x',
-                'x@GRAD@RENAME@block0@0': 'x',
-                'x@GRAD@RENAME@block0@1': 'x',
-                'y@GRAD': 'y',
-                'z@GRAD': 'z',
-                'z@GRAD@RENAME@block0@0': 'z',
-                'z@GRAD@RENAME@block0@1': 'z',
-            }
-            decomp.decompose_pir_program(
-                pir_program, param_mapping, grad_var_to_var
-            )
-
-        with (
-            paddle.pir_utils.IrGuard(),
-            paddle.pir.core.program_guard(pir_program),
-        ):
-            exe = paddle.static.Executor()
-            outs = exe.run(
-                pir_program,
-                feed={'x': self.x, 'y': self.y, 'z': self.z},
-                fetch_list=fetch_list,
-            )
-            core._set_prim_backward_enabled(False)
-            core._set_prim_forward_enabled(False)
-
-        return outs
-
-    def test_decompose_op(self):
-        res_ref = self.net()
-        res = self.net("decompose")
-        for ref, actual in zip(res_ref, res):
-            np.testing.assert_allclose(ref, actual, atol=1e-4)
-
-
 if __name__ == "__main__":
     unittest.main()