From e8a15c8bdf439a296b7143c38f13859c285e6656 Mon Sep 17 00:00:00 2001 From: Hu Shenwei Date: Tue, 19 Aug 2025 10:39:46 +0800 Subject: [PATCH 1/2] fix(math.py): fix output type diff for clip kernel --- python/paddle/tensor/math.py | 21 ++++- test/legacy_test/test_clip_op.py | 147 +++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+), 2 deletions(-) diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index 0d72b3d1eace77..5ba27590337bf7 100644 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -3706,7 +3706,8 @@ def clip( name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Returns: - Tensor: A Tensor with the same data type and data shape as input. + Tensor: A Tensor with the same data shape as input. If either min or max is a floating-point value/Tensor, the output tensor will have a data type of ``float32``. Otherwise, the output tensor will inherit the same data type as the input. + Examples: .. code-block:: python @@ -3727,6 +3728,20 @@ def clip( """ x_dtype = str(x.dtype) + is_cast_x_to_fp32 = False + if x_dtype in ['paddle.int32', 'paddle.int64'] and ( + isinstance(min, float) + or isinstance(max, float) + or ( + isinstance(min, Variable) + and isinstance(min.item(0), float) + or (isinstance(max, Variable) and isinstance(max.item(0), float)) + ) + ): + is_cast_x_to_fp32 = True + x = paddle.cast(x, paddle.float32) + x_dtype = 'paddle.float32' + if x_dtype == 'paddle.int32': min_ = np.iinfo(np.int32).min max_ = np.iinfo(np.int32).max - 2**7 @@ -3797,7 +3812,9 @@ def clip( helper = LayerHelper('clip', **locals()) output = helper.create_variable_for_type_inference( - dtype=helper.input_dtype('x') + dtype=( + helper.input_dtype('x') if not is_cast_x_to_fp32 else "float32" + ) ) helper.append_op( type='clip', inputs=inputs, outputs={'Out': [output]}, attrs=attrs diff --git a/test/legacy_test/test_clip_op.py b/test/legacy_test/test_clip_op.py index 0771ff51e61e5e..d9324d959b46ec 100644 --- a/test/legacy_test/test_clip_op.py +++ b/test/legacy_test/test_clip_op.py @@ -487,6 +487,153 @@ def test_errors(self): paddle.disable_static() +class TestClipAPI_Int(unittest.TestCase): + def _executed_api(self, x, min=None, max=None): + return paddle.clip(x, min, max) + + def test_clip(self): + paddle.enable_static() + data_shape = [1, 9, 9, 4] + data = np.random.random(data_shape).astype('int32') + place = ( + base.CUDAPlace(0) + if base.core.is_compiled_with_cuda() + else base.CPUPlace() + ) + exe = base.Executor(place) + + main = paddle.static.Program() + startup = paddle.static.Program() + with paddle.static.program_guard(main, startup): + images = paddle.static.data( + name='image', shape=data_shape, dtype='int32' + ) + min = paddle.static.data(name='min', shape=[1], dtype='float32') + max = paddle.static.data(name='max', shape=[1], dtype='float32') + out_1 = self._executed_api(images, min=min, max=max) + out_2 = self._executed_api(images, min=2.2, max=8.9) + out_3 = self._executed_api(images, min=3.3) + out_4 = self._executed_api(images, max=4.7) + out_5 = self._executed_api(images, min=min) + out_6 = self._executed_api(images, max=max) + out_7 = self._executed_api(images, max=-1.0) + out_8 = self._executed_api(images) + out_9 = self._executed_api( + paddle.cast(images, 'int32'), min=2.2, max=8.9 + ) + out_10 = self._executed_api( + paddle.cast(images * 10, 'int32'), min=2.8, max=8.8 + ) + out_11 = self._executed_api( + paddle.cast(images * 10, 'int64'), min=2.8, max=8.8 + ) + + ( + res1, + res2, + res3, + res4, + res5, + res6, + res7, + res8, + res9, + res10, + res11, + ) = exe.run( + main, + feed={ + "image": data, + "min": np.array([2.2]).astype('float32'), + "max": np.array([8.8]).astype('float32'), + }, + fetch_list=[ + out_1, + out_2, + out_3, + out_4, + out_5, + out_6, + out_7, + out_8, + out_9, + out_10, + out_11, + ], + ) + + np.testing.assert_allclose(res1, data.clip(2.2, 8.8), rtol=1e-05) + np.testing.assert_allclose(res2, data.clip(2.2, 8.9), rtol=1e-05) + np.testing.assert_allclose(res3, data.clip(min=3.3), rtol=1e-05) + np.testing.assert_allclose(res4, data.clip(max=4.7), rtol=1e-05) + np.testing.assert_allclose(res5, data.clip(min=2.2), rtol=1e-05) + np.testing.assert_allclose(res6, data.clip(max=8.8), rtol=1e-05) + np.testing.assert_allclose(res7, data.clip(max=-1.0), rtol=1e-05) + np.testing.assert_allclose(res8, data, rtol=1e-05) + np.testing.assert_allclose( + res9, data.astype(np.int32).clip(2.2, 8.9), rtol=1e-05 + ) + np.testing.assert_allclose( + res10, (data * 10).astype(np.int32).clip(2.8, 8.8), rtol=1e-05 + ) + np.testing.assert_allclose( + res11, (data * 10).astype(np.int64).clip(2.8, 8.8), rtol=1e-05 + ) + paddle.disable_static() + + def test_clip_dygraph(self): + paddle.disable_static() + place = ( + base.CUDAPlace(0) + if base.core.is_compiled_with_cuda() + else base.CPUPlace() + ) + paddle.disable_static(place) + data_shape = [1, 9, 9, 4] + data = np.random.random(data_shape).astype('int32') + images = paddle.to_tensor(data, dtype='int32') + v_min = paddle.to_tensor(np.array([2.2], dtype=np.float32)) + v_max = paddle.to_tensor(np.array([8.8], dtype=np.float32)) + + out_1 = self._executed_api(images, min=2.2, max=8.8) + images = paddle.to_tensor(data, dtype='int32') + out_2 = self._executed_api(images, min=2.2, max=8.9) + images = paddle.to_tensor(data, dtype='int32') + out_3 = self._executed_api(images, min=v_min, max=v_max) + + out_4 = self._executed_api( + paddle.cast(images * 10, 'int32'), min=2.2, max=8.8 + ) + out_5 = self._executed_api( + paddle.cast(images * 10, 'int64'), min=2.2, max=8.8 + ) + # test with numpy.generic + out_6 = self._executed_api(images, min=np.abs(2.2), max=np.abs(8.8)) + + np.testing.assert_allclose( + out_1.numpy(), data.clip(2.2, 8.8), rtol=1e-05 + ) + np.testing.assert_allclose( + out_2.numpy(), data.clip(2.2, 8.9), rtol=1e-05 + ) + np.testing.assert_allclose( + out_3.numpy(), data.clip(2.2, 8.8), rtol=1e-05 + ) + np.testing.assert_allclose( + out_4.numpy(), + (data * 10).astype(np.int32).clip(2.2, 8.8), + rtol=1e-05, + ) + np.testing.assert_allclose( + out_5.numpy(), + (data * 10).astype(np.int64).clip(2.2, 8.8), + rtol=1e-05, + ) + np.testing.assert_allclose( + out_6.numpy(), data.clip(2.2, 8.8), rtol=1e-05 + ) + + class TestClipOpFp16(unittest.TestCase): def test_fp16(self): if base.core.is_compiled_with_cuda(): From b714fd8f966afc65a9ab543301f4cbeb482e6377 Mon Sep 17 00:00:00 2001 From: Hu Shenwei Date: Tue, 19 Aug 2025 16:11:20 +0800 Subject: [PATCH 2/2] fix(math.py): fix output type diff for clip kernel --- python/paddle/tensor/math.py | 51 +++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index 5ba27590337bf7..43c2df548218a9 100644 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -3728,20 +3728,6 @@ def clip( """ x_dtype = str(x.dtype) - is_cast_x_to_fp32 = False - if x_dtype in ['paddle.int32', 'paddle.int64'] and ( - isinstance(min, float) - or isinstance(max, float) - or ( - isinstance(min, Variable) - and isinstance(min.item(0), float) - or (isinstance(max, Variable) and isinstance(max.item(0), float)) - ) - ): - is_cast_x_to_fp32 = True - x = paddle.cast(x, paddle.float32) - x_dtype = 'paddle.float32' - if x_dtype == 'paddle.int32': min_ = np.iinfo(np.int32).min max_ = np.iinfo(np.int32).max - 2**7 @@ -3757,14 +3743,33 @@ def clip( else: min_ = float(np.finfo(np.float32).min) max_ = float(np.finfo(np.float32).max) + min = min_ if min is None else min + max = max_ if max is None else max - if in_dynamic_or_pir_mode(): - if isinstance(min, Variable): - min = min.item(0) - if isinstance(max, Variable): - max = max.item(0) - min = min_ if min is None else min - max = max_ if max is None else max + if in_dynamic_mode(): + if x_dtype in ['paddle.int32', 'paddle.int64']: + if isinstance(min, paddle.Tensor): + min = min.item(0) + if isinstance(max, paddle.Tensor): + max = max.item(0) + if isinstance(min, float) or isinstance(max, float): + x = paddle.cast(x, paddle.float32) + return _C_ops.clip(x, min, max) + elif in_pir_mode(): + if x_dtype in ['paddle.int32', 'paddle.int64']: + if ( + isinstance(min, float) + or isinstance(max, float) + or ( + isinstance(min, paddle.pir.Value) + and min.dtype in [paddle.float32, paddle.float64] + ) + or ( + isinstance(max, paddle.pir.Value) + and max.dtype in [paddle.float32, paddle.float64] + ) + ): + x = paddle.cast(x, paddle.float32) return _C_ops.clip(x, min, max) else: if min is not None: @@ -3812,9 +3817,7 @@ def clip( helper = LayerHelper('clip', **locals()) output = helper.create_variable_for_type_inference( - dtype=( - helper.input_dtype('x') if not is_cast_x_to_fp32 else "float32" - ) + dtype=helper.input_dtype('x') ) helper.append_op( type='clip', inputs=inputs, outputs={'Out': [output]}, attrs=attrs