From e8a15c8bdf439a296b7143c38f13859c285e6656 Mon Sep 17 00:00:00 2001
From: Hu Shenwei <hushenwei@baidu.com>
Date: Tue, 19 Aug 2025 10:39:46 +0800
Subject: [PATCH 1/2] fix(math.py): fix output type diff for clip kernel

---
 python/paddle/tensor/math.py     |  21 ++++-
 test/legacy_test/test_clip_op.py | 147 +++++++++++++++++++++++++++++++
 2 files changed, 166 insertions(+), 2 deletions(-)

diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
index 0d72b3d1eace77..5ba27590337bf7 100644
--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -3706,7 +3706,8 @@ def clip(
         name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
 
     Returns:
-        Tensor: A Tensor with the same data type and data shape as input.
+        Tensor: A Tensor with the same data shape as input. If either min or max is a floating-point value/Tensor, the output tensor will have a data type of ``float32``. Otherwise, the output tensor will inherit the same data type as the input.
+
 
     Examples:
         .. code-block:: python
@@ -3727,6 +3728,20 @@ def clip(
     """
 
     x_dtype = str(x.dtype)
+    is_cast_x_to_fp32 = False
+    if x_dtype in ['paddle.int32', 'paddle.int64'] and (
+        isinstance(min, float)
+        or isinstance(max, float)
+        or (
+            isinstance(min, Variable)
+            and isinstance(min.item(0), float)
+            or (isinstance(max, Variable) and isinstance(max.item(0), float))
+        )
+    ):
+        is_cast_x_to_fp32 = True
+        x = paddle.cast(x, paddle.float32)
+        x_dtype = 'paddle.float32'
+
     if x_dtype == 'paddle.int32':
         min_ = np.iinfo(np.int32).min
         max_ = np.iinfo(np.int32).max - 2**7
@@ -3797,7 +3812,9 @@ def clip(
 
         helper = LayerHelper('clip', **locals())
         output = helper.create_variable_for_type_inference(
-            dtype=helper.input_dtype('x')
+            dtype=(
+                helper.input_dtype('x') if not is_cast_x_to_fp32 else "float32"
+            )
         )
         helper.append_op(
             type='clip', inputs=inputs, outputs={'Out': [output]}, attrs=attrs
diff --git a/test/legacy_test/test_clip_op.py b/test/legacy_test/test_clip_op.py
index 0771ff51e61e5e..d9324d959b46ec 100644
--- a/test/legacy_test/test_clip_op.py
+++ b/test/legacy_test/test_clip_op.py
@@ -487,6 +487,153 @@ def test_errors(self):
         paddle.disable_static()
 
 
+class TestClipAPI_Int(unittest.TestCase):
+    def _executed_api(self, x, min=None, max=None):
+        return paddle.clip(x, min, max)
+
+    def test_clip(self):
+        paddle.enable_static()
+        data_shape = [1, 9, 9, 4]
+        data = np.random.random(data_shape).astype('int32')
+        place = (
+            base.CUDAPlace(0)
+            if base.core.is_compiled_with_cuda()
+            else base.CPUPlace()
+        )
+        exe = base.Executor(place)
+
+        main = paddle.static.Program()
+        startup = paddle.static.Program()
+        with paddle.static.program_guard(main, startup):
+            images = paddle.static.data(
+                name='image', shape=data_shape, dtype='int32'
+            )
+            min = paddle.static.data(name='min', shape=[1], dtype='float32')
+            max = paddle.static.data(name='max', shape=[1], dtype='float32')
+            out_1 = self._executed_api(images, min=min, max=max)
+            out_2 = self._executed_api(images, min=2.2, max=8.9)
+            out_3 = self._executed_api(images, min=3.3)
+            out_4 = self._executed_api(images, max=4.7)
+            out_5 = self._executed_api(images, min=min)
+            out_6 = self._executed_api(images, max=max)
+            out_7 = self._executed_api(images, max=-1.0)
+            out_8 = self._executed_api(images)
+            out_9 = self._executed_api(
+                paddle.cast(images, 'int32'), min=2.2, max=8.9
+            )
+            out_10 = self._executed_api(
+                paddle.cast(images * 10, 'int32'), min=2.8, max=8.8
+            )
+            out_11 = self._executed_api(
+                paddle.cast(images * 10, 'int64'), min=2.8, max=8.8
+            )
+
+        (
+            res1,
+            res2,
+            res3,
+            res4,
+            res5,
+            res6,
+            res7,
+            res8,
+            res9,
+            res10,
+            res11,
+        ) = exe.run(
+            main,
+            feed={
+                "image": data,
+                "min": np.array([2.2]).astype('float32'),
+                "max": np.array([8.8]).astype('float32'),
+            },
+            fetch_list=[
+                out_1,
+                out_2,
+                out_3,
+                out_4,
+                out_5,
+                out_6,
+                out_7,
+                out_8,
+                out_9,
+                out_10,
+                out_11,
+            ],
+        )
+
+        np.testing.assert_allclose(res1, data.clip(2.2, 8.8), rtol=1e-05)
+        np.testing.assert_allclose(res2, data.clip(2.2, 8.9), rtol=1e-05)
+        np.testing.assert_allclose(res3, data.clip(min=3.3), rtol=1e-05)
+        np.testing.assert_allclose(res4, data.clip(max=4.7), rtol=1e-05)
+        np.testing.assert_allclose(res5, data.clip(min=2.2), rtol=1e-05)
+        np.testing.assert_allclose(res6, data.clip(max=8.8), rtol=1e-05)
+        np.testing.assert_allclose(res7, data.clip(max=-1.0), rtol=1e-05)
+        np.testing.assert_allclose(res8, data, rtol=1e-05)
+        np.testing.assert_allclose(
+            res9, data.astype(np.int32).clip(2.2, 8.9), rtol=1e-05
+        )
+        np.testing.assert_allclose(
+            res10, (data * 10).astype(np.int32).clip(2.8, 8.8), rtol=1e-05
+        )
+        np.testing.assert_allclose(
+            res11, (data * 10).astype(np.int64).clip(2.8, 8.8), rtol=1e-05
+        )
+        paddle.disable_static()
+
+    def test_clip_dygraph(self):
+        paddle.disable_static()
+        place = (
+            base.CUDAPlace(0)
+            if base.core.is_compiled_with_cuda()
+            else base.CPUPlace()
+        )
+        paddle.disable_static(place)
+        data_shape = [1, 9, 9, 4]
+        data = np.random.random(data_shape).astype('int32')
+        images = paddle.to_tensor(data, dtype='int32')
+        v_min = paddle.to_tensor(np.array([2.2], dtype=np.float32))
+        v_max = paddle.to_tensor(np.array([8.8], dtype=np.float32))
+
+        out_1 = self._executed_api(images, min=2.2, max=8.8)
+        images = paddle.to_tensor(data, dtype='int32')
+        out_2 = self._executed_api(images, min=2.2, max=8.9)
+        images = paddle.to_tensor(data, dtype='int32')
+        out_3 = self._executed_api(images, min=v_min, max=v_max)
+
+        out_4 = self._executed_api(
+            paddle.cast(images * 10, 'int32'), min=2.2, max=8.8
+        )
+        out_5 = self._executed_api(
+            paddle.cast(images * 10, 'int64'), min=2.2, max=8.8
+        )
+        # test with numpy.generic
+        out_6 = self._executed_api(images, min=np.abs(2.2), max=np.abs(8.8))
+
+        np.testing.assert_allclose(
+            out_1.numpy(), data.clip(2.2, 8.8), rtol=1e-05
+        )
+        np.testing.assert_allclose(
+            out_2.numpy(), data.clip(2.2, 8.9), rtol=1e-05
+        )
+        np.testing.assert_allclose(
+            out_3.numpy(), data.clip(2.2, 8.8), rtol=1e-05
+        )
+        np.testing.assert_allclose(
+            out_4.numpy(),
+            (data * 10).astype(np.int32).clip(2.2, 8.8),
+            rtol=1e-05,
+        )
+        np.testing.assert_allclose(
+            out_5.numpy(),
+            (data * 10).astype(np.int64).clip(2.2, 8.8),
+            rtol=1e-05,
+        )
+        np.testing.assert_allclose(
+            out_6.numpy(), data.clip(2.2, 8.8), rtol=1e-05
+        )
+
+
 class TestClipOpFp16(unittest.TestCase):
     def test_fp16(self):
         if base.core.is_compiled_with_cuda():

From b714fd8f966afc65a9ab543301f4cbeb482e6377 Mon Sep 17 00:00:00 2001
From: Hu Shenwei <hushenwei@baidu.com>
Date: Tue, 19 Aug 2025 16:11:20 +0800
Subject: [PATCH 2/2] fix(math.py): fix output type diff for clip kernel

---
 python/paddle/tensor/math.py | 51 +++++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 24 deletions(-)

diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
index 5ba27590337bf7..43c2df548218a9 100644
--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -3728,20 +3728,6 @@ def clip(
     """
 
     x_dtype = str(x.dtype)
-    is_cast_x_to_fp32 = False
-    if x_dtype in ['paddle.int32', 'paddle.int64'] and (
-        isinstance(min, float)
-        or isinstance(max, float)
-        or (
-            isinstance(min, Variable)
-            and isinstance(min.item(0), float)
-            or (isinstance(max, Variable) and isinstance(max.item(0), float))
-        )
-    ):
-        is_cast_x_to_fp32 = True
-        x = paddle.cast(x, paddle.float32)
-        x_dtype = 'paddle.float32'
-
     if x_dtype == 'paddle.int32':
         min_ = np.iinfo(np.int32).min
         max_ = np.iinfo(np.int32).max - 2**7
@@ -3757,14 +3743,33 @@ def clip(
     else:
         min_ = float(np.finfo(np.float32).min)
         max_ = float(np.finfo(np.float32).max)
+    min = min_ if min is None else min
+    max = max_ if max is None else max
 
-    if in_dynamic_or_pir_mode():
-        if isinstance(min, Variable):
-            min = min.item(0)
-        if isinstance(max, Variable):
-            max = max.item(0)
-        min = min_ if min is None else min
-        max = max_ if max is None else max
+    if in_dynamic_mode():
+        if x_dtype in ['paddle.int32', 'paddle.int64']:
+            if isinstance(min, paddle.Tensor):
+                min = min.item(0)
+            if isinstance(max, paddle.Tensor):
+                max = max.item(0)
+            if isinstance(min, float) or isinstance(max, float):
+                x = paddle.cast(x, paddle.float32)
+        return _C_ops.clip(x, min, max)
+    elif in_pir_mode():
+        if x_dtype in ['paddle.int32', 'paddle.int64']:
+            if (
+                isinstance(min, float)
+                or isinstance(max, float)
+                or (
+                    isinstance(min, paddle.pir.Value)
+                    and min.dtype in [paddle.float32, paddle.float64]
+                )
+                or (
+                    isinstance(max, paddle.pir.Value)
+                    and max.dtype in [paddle.float32, paddle.float64]
+                )
+            ):
+                x = paddle.cast(x, paddle.float32)
         return _C_ops.clip(x, min, max)
     else:
         if min is not None:
@@ -3812,9 +3817,7 @@ def clip(
 
         helper = LayerHelper('clip', **locals())
         output = helper.create_variable_for_type_inference(
-            dtype=(
-                helper.input_dtype('x') if not is_cast_x_to_fp32 else "float32"
-            )
+            dtype=helper.input_dtype('x')
         )
         helper.append_op(
             type='clip', inputs=inputs, outputs={'Out': [output]}, attrs=attrs