diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 3f0056e96ab..d26b21fbc7e 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -328,22 +328,6 @@ def test_auto_augment(self, transform, input): def test_normalize(self, transform, input): transform(input) - @parametrize( - [ - ( - transforms.RandomResizedCrop([16, 16], antialias=True), - itertools.chain( - make_images(extra_dims=[(4,)]), - make_vanilla_tensor_images(), - make_pil_images(), - make_videos(extra_dims=[()]), - ), - ) - ] - ) - def test_random_resized_crop(self, transform, input): - transform(input) - @pytest.mark.parametrize( "flat_inputs", diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index 9badd8dbe1b..d2d5ff0c50a 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -252,30 +252,6 @@ def __init__( # Use default tolerances of `torch.testing.assert_close` closeness_kwargs=dict(rtol=None, atol=None), ), - ConsistencyConfig( - v2_transforms.RandomResizedCrop, - legacy_transforms.RandomResizedCrop, - [ - ArgsKwargs(16), - ArgsKwargs(17, scale=(0.3, 0.7)), - ArgsKwargs(25, ratio=(0.5, 1.5)), - ArgsKwargs((31, 28), interpolation=v2_transforms.InterpolationMode.NEAREST), - ArgsKwargs((31, 28), interpolation=PIL.Image.NEAREST), - ArgsKwargs((29, 32), antialias=False), - ArgsKwargs((28, 31), antialias=True), - ], - # atol=1 due to Resize v2 is using native uint8 interpolate path for bilinear and nearest modes - closeness_kwargs=dict(rtol=0, atol=1), - ), - ConsistencyConfig( - v2_transforms.RandomResizedCrop, - legacy_transforms.RandomResizedCrop, - [ - ArgsKwargs((33, 26), interpolation=v2_transforms.InterpolationMode.BICUBIC, antialias=True), - ArgsKwargs((33, 26), interpolation=PIL.Image.BICUBIC, antialias=True), - ], - closeness_kwargs=dict(rtol=0, atol=21), - ), ConsistencyConfig( v2_transforms.ColorJitter, legacy_transforms.ColorJitter, @@ -535,7 +511,6 @@ def test_call_consistency(config, args_kwargs): id=transform_cls.__name__, ) for transform_cls, get_params_args_kwargs in [ - (v2_transforms.RandomResizedCrop, ArgsKwargs(make_image(), scale=[0.3, 0.7], ratio=[0.5, 1.5])), (v2_transforms.ColorJitter, ArgsKwargs(brightness=None, contrast=None, saturation=None, hue=None)), (v2_transforms.GaussianBlur, ArgsKwargs(0.3, 1.4)), (v2_transforms.RandomPerspective, ArgsKwargs(23, 17, 0.5)), diff --git a/test/test_transforms_v2_functional.py b/test/test_transforms_v2_functional.py index 8bbec4d8011..03b7656d739 100644 --- a/test/test_transforms_v2_functional.py +++ b/test/test_transforms_v2_functional.py @@ -1,5 +1,4 @@ import inspect -import math import os import re @@ -526,88 +525,6 @@ def test_tv_tensor_explicit_metadata(self, metadata): # `transforms_v2_kernel_infos.py` -def _compute_affine_matrix(angle_, translate_, scale_, shear_, center_): - rot = math.radians(angle_) - cx, cy = center_ - tx, ty = translate_ - sx, sy = [math.radians(sh_) for sh_ in shear_] - - c_matrix = np.array([[1, 0, cx], [0, 1, cy], [0, 0, 1]]) - t_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]]) - c_matrix_inv = np.linalg.inv(c_matrix) - rs_matrix = np.array( - [ - [scale_ * math.cos(rot), -scale_ * math.sin(rot), 0], - [scale_ * math.sin(rot), scale_ * math.cos(rot), 0], - [0, 0, 1], - ] - ) - shear_x_matrix = np.array([[1, -math.tan(sx), 0], [0, 1, 0], [0, 0, 1]]) - shear_y_matrix = np.array([[1, 0, 0], [-math.tan(sy), 1, 0], [0, 0, 1]]) - rss_matrix = np.matmul(rs_matrix, np.matmul(shear_y_matrix, shear_x_matrix)) - true_matrix = np.matmul(t_matrix, np.matmul(c_matrix, np.matmul(rss_matrix, c_matrix_inv))) - return true_matrix - - -@pytest.mark.parametrize("device", cpu_and_cuda()) -def test_correctness_vertical_flip_segmentation_mask_on_fixed_input(device): - mask = torch.zeros((3, 3, 3), dtype=torch.long, device=device) - mask[:, 0, :] = 1 - - out_mask = F.vertical_flip_mask(mask) - - expected_mask = torch.zeros((3, 3, 3), dtype=torch.long, device=device) - expected_mask[:, -1, :] = 1 - torch.testing.assert_close(out_mask, expected_mask) - - -@pytest.mark.parametrize("device", cpu_and_cuda()) -@pytest.mark.parametrize( - "format", - [tv_tensors.BoundingBoxFormat.XYXY, tv_tensors.BoundingBoxFormat.XYWH, tv_tensors.BoundingBoxFormat.CXCYWH], -) -@pytest.mark.parametrize( - "top, left, height, width, size", - [ - [0, 0, 30, 30, (60, 60)], - [-5, 5, 35, 45, (32, 34)], - ], -) -def test_correctness_resized_crop_bounding_boxes(device, format, top, left, height, width, size): - def _compute_expected_bbox(bbox, top_, left_, height_, width_, size_): - # bbox should be xyxy - bbox[0] = (bbox[0] - left_) * size_[1] / width_ - bbox[1] = (bbox[1] - top_) * size_[0] / height_ - bbox[2] = (bbox[2] - left_) * size_[1] / width_ - bbox[3] = (bbox[3] - top_) * size_[0] / height_ - return bbox - - format = tv_tensors.BoundingBoxFormat.XYXY - canvas_size = (100, 100) - in_boxes = [ - [10.0, 10.0, 20.0, 20.0], - [5.0, 10.0, 15.0, 20.0], - ] - expected_bboxes = [] - for in_box in in_boxes: - expected_bboxes.append(_compute_expected_bbox(list(in_box), top, left, height, width, size)) - expected_bboxes = torch.tensor(expected_bboxes, device=device) - - in_boxes = tv_tensors.BoundingBoxes( - in_boxes, format=tv_tensors.BoundingBoxFormat.XYXY, canvas_size=canvas_size, device=device - ) - if format != tv_tensors.BoundingBoxFormat.XYXY: - in_boxes = convert_bounding_box_format(in_boxes, tv_tensors.BoundingBoxFormat.XYXY, format) - - output_boxes, output_canvas_size = F.resized_crop_bounding_boxes(in_boxes, format, top, left, height, width, size) - - if format != tv_tensors.BoundingBoxFormat.XYXY: - output_boxes = convert_bounding_box_format(output_boxes, format, tv_tensors.BoundingBoxFormat.XYXY) - - torch.testing.assert_close(output_boxes, expected_bboxes) - torch.testing.assert_close(output_canvas_size, size) - - def _parse_padding(padding): if isinstance(padding, int): return [padding] * 4 diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 267e5ab85f5..1650e2ca7f9 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -3110,3 +3110,136 @@ def test_errors(self): F.convert_bounding_box_format( input_tv_tensor, old_format=input_tv_tensor.format, new_format=input_tv_tensor.format ) + + +class TestResizedCrop: + INPUT_SIZE = (17, 11) + CROP_KWARGS = dict(top=2, left=2, height=5, width=7) + OUTPUT_SIZE = (19, 32) + + @pytest.mark.parametrize( + ("kernel", "make_input"), + [ + (F.resized_crop_image, make_image), + (F.resized_crop_bounding_boxes, make_bounding_boxes), + (F.resized_crop_mask, make_segmentation_mask), + (F.resized_crop_mask, make_detection_mask), + (F.resized_crop_video, make_video), + ], + ) + def test_kernel(self, kernel, make_input): + input = make_input(self.INPUT_SIZE) + if isinstance(input, tv_tensors.BoundingBoxes): + extra_kwargs = dict(format=input.format) + elif isinstance(input, tv_tensors.Mask): + extra_kwargs = dict() + else: + extra_kwargs = dict(antialias=True) + + check_kernel(kernel, input, **self.CROP_KWARGS, size=self.OUTPUT_SIZE, **extra_kwargs) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_functional(self, make_input): + check_functional( + F.resized_crop, make_input(self.INPUT_SIZE), **self.CROP_KWARGS, size=self.OUTPUT_SIZE, antialias=True + ) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.resized_crop_image, torch.Tensor), + (F._resized_crop_image_pil, PIL.Image.Image), + (F.resized_crop_image, tv_tensors.Image), + (F.resized_crop_bounding_boxes, tv_tensors.BoundingBoxes), + (F.resized_crop_mask, tv_tensors.Mask), + (F.resized_crop_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.resized_crop, kernel=kernel, input_type=input_type) + + @param_value_parametrization( + scale=[(0.1, 0.2), [0.0, 1.0]], + ratio=[(0.3, 0.7), [0.1, 5.0]], + ) + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_transform(self, param, value, make_input): + check_transform( + transforms.RandomResizedCrop(size=self.OUTPUT_SIZE, **{param: value}, antialias=True), + make_input(self.INPUT_SIZE), + check_v1_compatibility=dict(rtol=0, atol=1), + ) + + # `InterpolationMode.NEAREST` is modeled after the buggy `INTER_NEAREST` interpolation of CV2. + # The PIL equivalent of `InterpolationMode.NEAREST` is `InterpolationMode.NEAREST_EXACT` + @pytest.mark.parametrize("interpolation", set(INTERPOLATION_MODES) - {transforms.InterpolationMode.NEAREST}) + def test_functional_image_correctness(self, interpolation): + image = make_image(self.INPUT_SIZE, dtype=torch.uint8) + + actual = F.resized_crop( + image, **self.CROP_KWARGS, size=self.OUTPUT_SIZE, interpolation=interpolation, antialias=True + ) + expected = F.to_image( + F.resized_crop( + F.to_pil_image(image), **self.CROP_KWARGS, size=self.OUTPUT_SIZE, interpolation=interpolation + ) + ) + + torch.testing.assert_close(actual, expected, atol=1, rtol=0) + + def _reference_resized_crop_bounding_boxes(self, bounding_boxes, *, top, left, height, width, size): + new_height, new_width = size + + crop_affine_matrix = np.array( + [ + [1, 0, -left], + [0, 1, -top], + [0, 0, 1], + ], + ) + resize_affine_matrix = np.array( + [ + [new_width / width, 0, 0], + [0, new_height / height, 0], + [0, 0, 1], + ], + ) + affine_matrix = (resize_affine_matrix @ crop_affine_matrix)[:2, :] + + return reference_affine_bounding_boxes_helper( + bounding_boxes, + affine_matrix=affine_matrix, + new_canvas_size=size, + ) + + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + def test_functional_bounding_boxes_correctness(self, format): + bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format) + + actual = F.resized_crop(bounding_boxes, **self.CROP_KWARGS, size=self.OUTPUT_SIZE) + expected = self._reference_resized_crop_bounding_boxes( + bounding_boxes, **self.CROP_KWARGS, size=self.OUTPUT_SIZE + ) + + assert_equal(actual, expected) + assert_equal(F.get_size(actual), F.get_size(expected)) + + def test_transform_errors_warnings(self): + with pytest.raises(ValueError, match="provide only two dimensions"): + transforms.RandomResizedCrop(size=(1, 2, 3)) + + with pytest.raises(TypeError, match="Scale should be a sequence"): + transforms.RandomResizedCrop(size=self.INPUT_SIZE, scale=123) + + with pytest.raises(TypeError, match="Ratio should be a sequence"): + transforms.RandomResizedCrop(size=self.INPUT_SIZE, ratio=123) + + for param in ["scale", "ratio"]: + with pytest.warns(match="Scale and ratio should be of kind"): + transforms.RandomResizedCrop(size=self.INPUT_SIZE, **{param: [1, 0]}) diff --git a/test/transforms_v2_dispatcher_infos.py b/test/transforms_v2_dispatcher_infos.py index a422e08fe2e..592fce4e17f 100644 --- a/test/transforms_v2_dispatcher_infos.py +++ b/test/transforms_v2_dispatcher_infos.py @@ -111,16 +111,6 @@ def xfail_jit_python_scalar_arg(name, *, reason=None): DISPATCHER_INFOS = [ - DispatcherInfo( - F.resized_crop, - kernels={ - tv_tensors.Image: F.resized_crop_image, - tv_tensors.Video: F.resized_crop_video, - tv_tensors.BoundingBoxes: F.resized_crop_bounding_boxes, - tv_tensors.Mask: F.resized_crop_mask, - }, - pil_kernel_info=PILKernelInfo(F._resized_crop_image_pil), - ), DispatcherInfo( F.pad, kernels={ diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py index e84513d2ff1..65e66240e50 100644 --- a/test/transforms_v2_kernel_infos.py +++ b/test/transforms_v2_kernel_infos.py @@ -226,92 +226,6 @@ def transform(bbox, affine_matrix_, format_, canvas_size_): ).reshape(bounding_boxes.shape) -_RESIZED_CROP_PARAMS = combinations_grid(top=[-8, 9], left=[-8, 9], height=[12], width=[12], size=[(16, 18)]) - - -def sample_inputs_resized_crop_image_tensor(): - for image_loader in make_image_loaders(): - yield ArgsKwargs(image_loader, **_RESIZED_CROP_PARAMS[0]) - - -@pil_reference_wrapper -def reference_resized_crop_image_tensor(*args, **kwargs): - if not kwargs.pop("antialias", False) and kwargs.get("interpolation", F.InterpolationMode.BILINEAR) in { - F.InterpolationMode.BILINEAR, - F.InterpolationMode.BICUBIC, - }: - raise pytest.UsageError("Anti-aliasing is always active in PIL") - return F._resized_crop_image_pil(*args, **kwargs) - - -def reference_inputs_resized_crop_image_tensor(): - for image_loader, interpolation, params in itertools.product( - make_image_loaders_for_interpolation(), - [ - F.InterpolationMode.NEAREST, - F.InterpolationMode.NEAREST_EXACT, - F.InterpolationMode.BILINEAR, - F.InterpolationMode.BICUBIC, - ], - _RESIZED_CROP_PARAMS, - ): - yield ArgsKwargs( - image_loader, - interpolation=interpolation, - antialias=interpolation - in { - F.InterpolationMode.BILINEAR, - F.InterpolationMode.BICUBIC, - }, - **params, - ) - - -def sample_inputs_resized_crop_bounding_boxes(): - for bounding_boxes_loader in make_bounding_box_loaders(): - yield ArgsKwargs(bounding_boxes_loader, format=bounding_boxes_loader.format, **_RESIZED_CROP_PARAMS[0]) - - -def sample_inputs_resized_crop_mask(): - for mask_loader in make_mask_loaders(): - yield ArgsKwargs(mask_loader, **_RESIZED_CROP_PARAMS[0]) - - -def sample_inputs_resized_crop_video(): - for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): - yield ArgsKwargs(video_loader, **_RESIZED_CROP_PARAMS[0]) - - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.resized_crop_image, - sample_inputs_fn=sample_inputs_resized_crop_image_tensor, - reference_fn=reference_resized_crop_image_tensor, - reference_inputs_fn=reference_inputs_resized_crop_image_tensor, - float32_vs_uint8=True, - closeness_kwargs={ - **cuda_vs_cpu_pixel_difference(), - **pil_reference_pixel_difference(3, mae=True), - **float32_vs_uint8_pixel_difference(3, mae=True), - }, - ), - KernelInfo( - F.resized_crop_bounding_boxes, - sample_inputs_fn=sample_inputs_resized_crop_bounding_boxes, - ), - KernelInfo( - F.resized_crop_mask, - sample_inputs_fn=sample_inputs_resized_crop_mask, - ), - KernelInfo( - F.resized_crop_video, - sample_inputs_fn=sample_inputs_resized_crop_video, - closeness_kwargs=cuda_vs_cpu_pixel_difference(), - ), - ] -) - _PAD_PARAMS = combinations_grid( padding=[[1], [1, 1], [1, 1, 2, 2]], padding_mode=["constant", "symmetric", "edge", "reflect"],