diff --git a/docs/source/datapoints.rst b/docs/source/datapoints.rst
index 55d3cda4a8c..90ab3375adb 100644
--- a/docs/source/datapoints.rst
+++ b/docs/source/datapoints.rst
@@ -14,6 +14,6 @@ see e.g. :ref:`sphx_glr_auto_examples_plot_transforms_v2_e2e.py`.
 
     Image
     Video
-    BoundingBoxFormat
-    BoundingBoxes
+    BBoxFormat
+    BBoxes
     Mask
diff --git a/docs/source/models.rst b/docs/source/models.rst
index 15540778602..f9e7963e221 100644
--- a/docs/source/models.rst
+++ b/docs/source/models.rst
@@ -413,7 +413,7 @@ Here is an example of how to use the pre-trained object detection models:
 
     from torchvision.io.image import read_image
     from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights
-    from torchvision.utils import draw_bounding_boxes
+    from torchvision.utils import draw_bboxes
     from torchvision.transforms.functional import to_pil_image
 
     img = read_image("test/assets/encode_jpeg/grace_hopper_517x606.jpg")
@@ -432,7 +432,7 @@ Here is an example of how to use the pre-trained object detection models:
     # Step 4: Use the model and visualize the prediction
     prediction = model(batch)[0]
     labels = [weights.meta["categories"][i] for i in prediction["labels"]]
-    box = draw_bounding_boxes(img, boxes=prediction["boxes"],
+    box = draw_bboxes(img, boxes=prediction["boxes"],
                               labels=labels,
                               colors="red",
                               width=4, font_size=30)
diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst
index 9f3efe30341..874156a6041 100644
--- a/docs/source/transforms.rst
+++ b/docs/source/transforms.rst
@@ -206,8 +206,8 @@ Miscellaneous
     v2.RandomErasing
     Lambda
     v2.Lambda
-    v2.SanitizeBoundingBoxes
-    v2.ClampBoundingBoxes
+    v2.SanitizeBBoxes
+    v2.ClampBBoxes
     v2.UniformTemporalSubsample
 
 .. _conversion_transforms:
@@ -236,7 +236,7 @@ Conversion
     ConvertImageDtype
     v2.ConvertImageDtype
     v2.ToDtype
-    v2.ConvertBoundingBoxFormat
+    v2.ConvertBBoxFormat
 
 Auto-Augmentation
 -----------------
diff --git a/docs/source/utils.rst b/docs/source/utils.rst
index 971381a658f..ee614f6e67f 100644
--- a/docs/source/utils.rst
+++ b/docs/source/utils.rst
@@ -12,7 +12,7 @@ visualization <sphx_glr_auto_examples_plot_visualization_utils.py>`.
     :toctree: generated/
     :template: function.rst
 
-    draw_bounding_boxes
+    draw_bboxes
     draw_segmentation_masks
     draw_keypoints
     flow_to_image
diff --git a/references/detection/presets.py b/references/detection/presets.py
index 098ec85e690..0b8d1124d19 100644
--- a/references/detection/presets.py
+++ b/references/detection/presets.py
@@ -77,8 +77,8 @@ def __init__(
 
         if use_v2:
             transforms += [
-                T.ConvertBoundingBoxFormat(datapoints.BoundingBoxFormat.XYXY),
-                T.SanitizeBoundingBoxes(),
+                T.ConvertBBoxFormat(datapoints.BBoxFormat.XYXY),
+                T.SanitizeBBoxes(),
             ]
 
         self.transforms = T.Compose(transforms)
diff --git a/test/builtin_dataset_mocks.py b/test/builtin_dataset_mocks.py
index ef5d5e1ec96..26f2a3076ad 100644
--- a/test/builtin_dataset_mocks.py
+++ b/test/builtin_dataset_mocks.py
@@ -925,7 +925,7 @@ def _make_attributes_file(cls, root, image_file_names):
         cls._make_ann_file(root, "list_attr_celeba.txt", data, field_names=(*field_names, ""))
 
     @classmethod
-    def _make_bounding_boxes_file(cls, root, image_file_names):
+    def _make_bboxes_file(cls, root, image_file_names):
         field_names = ("image_id", "x_1", "y_1", "width", "height")
         data = [
             [f"{name}  ", *[f"{coord:3d}" for coord in make_tensor((4,), low=0, dtype=torch.int).tolist()]]
@@ -960,7 +960,7 @@ def generate(cls, root):
         for make_ann_file_fn in (
             cls._make_identity_file,
             cls._make_attributes_file,
-            cls._make_bounding_boxes_file,
+            cls._make_bboxes_file,
             cls._make_landmarks_file,
         ):
             make_ann_file_fn(root, image_file_names)
@@ -1342,7 +1342,7 @@ def _make_archive(cls, root):
         with open(archive_folder / "train_test_split.txt", "w") as file:
             file.write("\n".join(f"{image_id} {split_id}" for image_id, split_id in zip(image_ids, split_ids)))
 
-        with open(archive_folder / "bounding_boxes.txt", "w") as file:
+        with open(archive_folder / "bboxes.txt", "w") as file:
             file.write(
                 "\n".join(
                     " ".join(
diff --git a/test/common_utils.py b/test/common_utils.py
index c9cff035cac..555cd5612db 100644
--- a/test/common_utils.py
+++ b/test/common_utils.py
@@ -620,15 +620,15 @@ def make_image_loaders_for_interpolation(
 
 
 @dataclasses.dataclass
-class BoundingBoxesLoader(TensorLoader):
-    format: datapoints.BoundingBoxFormat
+class BBoxesLoader(TensorLoader):
+    format: datapoints.BBoxFormat
     spatial_size: Tuple[int, int]
 
 
-def make_bounding_box(
+def make_bbox(
     size=None,
     *,
-    format=datapoints.BoundingBoxFormat.XYXY,
+    format=datapoints.BBoxFormat.XYXY,
     spatial_size=None,
     batch_dims=(),
     dtype=None,
@@ -639,7 +639,7 @@ def make_bounding_box(
         - (box[3] - box[1], box[2] - box[0]) for XYXY
         - (H, W) for XYWH and CXCYWH
     spatial_size: Size of the reference object, e.g. an image. Corresponds to the .spatial_size attribute on
-        returned datapoints.BoundingBoxes
+        returned datapoints.BBoxes
 
     To generate a valid joint sample, you need to set spatial_size here to the same value as size on the other maker
     functions, e.g.
@@ -647,8 +647,8 @@ def make_bounding_box(
     .. code::
 
         image = make_image=(size=size)
-        bounding_boxes = make_bounding_box(spatial_size=size)
-        assert F.get_spatial_size(bounding_boxes) == F.get_spatial_size(image)
+        bboxes = make_bbox(spatial_size=size)
+        assert F.get_spatial_size(bboxes) == F.get_spatial_size(image)
 
     For convenience, if both size and spatial_size are omitted, spatial_size defaults to the same value as size for all
     other maker functions, e.g.
@@ -656,8 +656,8 @@ def make_bounding_box(
     .. code::
 
         image = make_image=()
-        bounding_boxes = make_bounding_box()
-        assert F.get_spatial_size(bounding_boxes) == F.get_spatial_size(image)
+        bboxes = make_bbox()
+        assert F.get_spatial_size(bboxes) == F.get_spatial_size(image)
     """
 
     def sample_position(values, max_value):
@@ -666,7 +666,7 @@ def sample_position(values, max_value):
         return torch.stack([torch.randint(max_value - v, ()) for v in values.flatten().tolist()]).reshape(values.shape)
 
     if isinstance(format, str):
-        format = datapoints.BoundingBoxFormat[format]
+        format = datapoints.BBoxFormat[format]
 
     if spatial_size is None:
         if size is None:
@@ -679,7 +679,7 @@ def sample_position(values, max_value):
     dtype = dtype or torch.float32
 
     if any(dim == 0 for dim in batch_dims):
-        return datapoints.BoundingBoxes(
+        return datapoints.BBoxes(
             torch.empty(*batch_dims, 4, dtype=dtype, device=device), format=format, spatial_size=spatial_size
         )
 
@@ -691,28 +691,28 @@ def sample_position(values, max_value):
     y = sample_position(h, spatial_size[0])
     x = sample_position(w, spatial_size[1])
 
-    if format is datapoints.BoundingBoxFormat.XYWH:
+    if format is datapoints.BBoxFormat.XYWH:
         parts = (x, y, w, h)
-    elif format is datapoints.BoundingBoxFormat.XYXY:
+    elif format is datapoints.BBoxFormat.XYXY:
         x1, y1 = x, y
         x2 = x1 + w
         y2 = y1 + h
         parts = (x1, y1, x2, y2)
-    elif format is datapoints.BoundingBoxFormat.CXCYWH:
+    elif format is datapoints.BBoxFormat.CXCYWH:
         cx = x + w / 2
         cy = y + h / 2
         parts = (cx, cy, w, h)
     else:
         raise ValueError(f"Format {format} is not supported")
 
-    return datapoints.BoundingBoxes(
+    return datapoints.BBoxes(
         torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=spatial_size
     )
 
 
-def make_bounding_box_loader(*, extra_dims=(), format, spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.float32):
+def make_bbox_loader(*, extra_dims=(), format, spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.float32):
     if isinstance(format, str):
-        format = datapoints.BoundingBoxFormat[format]
+        format = datapoints.BBoxFormat[format]
 
     spatial_size = _parse_spatial_size(spatial_size, name="spatial_size")
 
@@ -721,25 +721,23 @@ def fn(shape, dtype, device):
         if num_coordinates != 4:
             raise pytest.UsageError()
 
-        return make_bounding_box(
-            format=format, spatial_size=spatial_size, batch_dims=batch_dims, dtype=dtype, device=device
-        )
+        return make_bbox(format=format, spatial_size=spatial_size, batch_dims=batch_dims, dtype=dtype, device=device)
 
-    return BoundingBoxesLoader(fn, shape=(*extra_dims, 4), dtype=dtype, format=format, spatial_size=spatial_size)
+    return BBoxesLoader(fn, shape=(*extra_dims, 4), dtype=dtype, format=format, spatial_size=spatial_size)
 
 
-def make_bounding_box_loaders(
+def make_bbox_loaders(
     *,
     extra_dims=DEFAULT_EXTRA_DIMS,
-    formats=tuple(datapoints.BoundingBoxFormat),
+    formats=tuple(datapoints.BBoxFormat),
     spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE,
     dtypes=(torch.float32, torch.float64, torch.int64),
 ):
     for params in combinations_grid(extra_dims=extra_dims, format=formats, dtype=dtypes):
-        yield make_bounding_box_loader(**params, spatial_size=spatial_size)
+        yield make_bbox_loader(**params, spatial_size=spatial_size)
 
 
-make_bounding_boxes = from_loaders(make_bounding_box_loaders)
+make_bboxes = from_loaders(make_bbox_loaders)
 
 
 class MaskLoader(TensorLoader):
diff --git a/test/test_datapoints.py b/test/test_datapoints.py
index a5f09043582..abb22a2134a 100644
--- a/test/test_datapoints.py
+++ b/test/test_datapoints.py
@@ -23,15 +23,13 @@ def test_mask_instance(data):
 
 
 @pytest.mark.parametrize("data", [torch.randint(0, 32, size=(5, 4)), [[0, 0, 5, 5], [2, 2, 7, 7]]])
-@pytest.mark.parametrize(
-    "format", ["XYXY", "CXCYWH", datapoints.BoundingBoxFormat.XYXY, datapoints.BoundingBoxFormat.XYWH]
-)
+@pytest.mark.parametrize("format", ["XYXY", "CXCYWH", datapoints.BBoxFormat.XYXY, datapoints.BBoxFormat.XYWH])
 def test_bbox_instance(data, format):
-    bboxes = datapoints.BoundingBoxes(data, format=format, spatial_size=(32, 32))
+    bboxes = datapoints.BBoxes(data, format=format, spatial_size=(32, 32))
     assert isinstance(bboxes, torch.Tensor)
     assert bboxes.ndim == 2 and bboxes.shape[1] == 4
     if isinstance(format, str):
-        format = datapoints.BoundingBoxFormat[(format.upper())]
+        format = datapoints.BBoxFormat[(format.upper())]
     assert bboxes.format == format
 
 
@@ -164,7 +162,7 @@ def test_wrap_like():
     [
         datapoints.Image(torch.rand(3, 16, 16)),
         datapoints.Video(torch.rand(2, 3, 16, 16)),
-        datapoints.BoundingBoxes([0.0, 1.0, 2.0, 3.0], format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(10, 10)),
+        datapoints.BBoxes([0.0, 1.0, 2.0, 3.0], format=datapoints.BBoxFormat.XYXY, spatial_size=(10, 10)),
         datapoints.Mask(torch.randint(0, 256, (16, 16), dtype=torch.uint8)),
     ],
 )
diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py
index d1f24410703..c5ae7c6de93 100644
--- a/test/test_prototype_transforms.py
+++ b/test/test_prototype_transforms.py
@@ -9,7 +9,7 @@
 from common_utils import (
     assert_equal,
     DEFAULT_EXTRA_DIMS,
-    make_bounding_box,
+    make_bbox,
     make_detection_mask,
     make_image,
     make_images,
@@ -20,7 +20,7 @@
 
 from prototype_common_utils import make_label, make_one_hot_labels
 
-from torchvision.datapoints import BoundingBoxes, BoundingBoxFormat, Image, Mask, Video
+from torchvision.datapoints import BBoxes, BBoxFormat, Image, Mask, Video
 from torchvision.prototype import datapoints, transforms
 from torchvision.transforms.v2._utils import _convert_fill_arg
 from torchvision.transforms.v2.functional import InterpolationMode, pil_to_tensor, to_image_pil
@@ -78,7 +78,7 @@ def test_mixup_cutmix(transform, input):
     input_copy = dict(input)
     for unsup_data in [
         make_label(),
-        make_bounding_box(format="XYXY"),
+        make_bbox(format="XYXY"),
         make_detection_mask(),
         make_segmentation_mask(),
     ]:
@@ -101,10 +101,10 @@ def test__extract_image_targets_assertion(self, mocker):
             self.create_fake_image(mocker, Image),
             # labels, bboxes, masks
             mocker.MagicMock(spec=datapoints.Label),
-            mocker.MagicMock(spec=BoundingBoxes),
+            mocker.MagicMock(spec=BBoxes),
             mocker.MagicMock(spec=Mask),
             # labels, bboxes, masks
-            mocker.MagicMock(spec=BoundingBoxes),
+            mocker.MagicMock(spec=BBoxes),
             mocker.MagicMock(spec=Mask),
         ]
 
@@ -122,11 +122,11 @@ def test__extract_image_targets(self, image_type, label_type, mocker):
             self.create_fake_image(mocker, image_type),
             # labels, bboxes, masks
             mocker.MagicMock(spec=label_type),
-            mocker.MagicMock(spec=BoundingBoxes),
+            mocker.MagicMock(spec=BBoxes),
             mocker.MagicMock(spec=Mask),
             # labels, bboxes, masks
             mocker.MagicMock(spec=label_type),
-            mocker.MagicMock(spec=BoundingBoxes),
+            mocker.MagicMock(spec=BBoxes),
             mocker.MagicMock(spec=Mask),
         ]
 
@@ -142,7 +142,7 @@ def test__extract_image_targets(self, image_type, label_type, mocker):
 
         for target in targets:
             for key, type_ in [
-                ("boxes", BoundingBoxes),
+                ("boxes", BBoxes),
                 ("masks", Mask),
                 ("labels", label_type),
             ]:
@@ -163,7 +163,7 @@ def test__copy_paste(self, label_type):
         if label_type == datapoints.OneHotLabel:
             labels = torch.nn.functional.one_hot(labels, num_classes=5)
         target = {
-            "boxes": BoundingBoxes(
+            "boxes": BBoxes(
                 torch.tensor([[2.0, 3.0, 8.0, 9.0], [20.0, 20.0, 30.0, 30.0]]), format="XYXY", spatial_size=(32, 32)
             ),
             "masks": Mask(masks),
@@ -178,7 +178,7 @@ def test__copy_paste(self, label_type):
         if label_type == datapoints.OneHotLabel:
             paste_labels = torch.nn.functional.one_hot(paste_labels, num_classes=5)
         paste_target = {
-            "boxes": BoundingBoxes(
+            "boxes": BBoxes(
                 torch.tensor([[12.0, 13.0, 19.0, 18.0], [1.0, 15.0, 8.0, 19.0]]), format="XYXY", spatial_size=(32, 32)
             ),
             "masks": Mask(paste_masks),
@@ -216,7 +216,7 @@ def test__get_params(self, mocker):
 
         flat_inputs = [
             make_image(size=spatial_size, color_space="RGB"),
-            make_bounding_box(format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=batch_shape),
+            make_bbox(format=BBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=batch_shape),
         ]
         params = transform._get_params(flat_inputs)
 
@@ -311,9 +311,7 @@ def test__transform_culling(self, mocker):
             ),
         )
 
-        bounding_boxes = make_bounding_box(
-            format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(batch_size,)
-        )
+        bboxes = make_bbox(format=BBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(batch_size,))
         masks = make_detection_mask(size=spatial_size, batch_dims=(batch_size,))
         labels = make_label(extra_dims=(batch_size,))
 
@@ -322,17 +320,17 @@ def test__transform_culling(self, mocker):
 
         output = transform(
             dict(
-                bounding_boxes=bounding_boxes,
+                bboxes=bboxes,
                 masks=masks,
                 labels=labels,
             )
         )
 
-        assert_equal(output["bounding_boxes"], bounding_boxes[is_valid])
+        assert_equal(output["bboxes"], bboxes[is_valid])
         assert_equal(output["masks"], masks[is_valid])
         assert_equal(output["labels"], labels[is_valid])
 
-    def test__transform_bounding_boxes_clamping(self, mocker):
+    def test__transform_bboxes_clamping(self, mocker):
         batch_size = 3
         spatial_size = (10, 10)
 
@@ -349,15 +347,13 @@ def test__transform_bounding_boxes_clamping(self, mocker):
             ),
         )
 
-        bounding_boxes = make_bounding_box(
-            format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(batch_size,)
-        )
-        mock = mocker.patch("torchvision.prototype.transforms._geometry.F.clamp_bounding_boxes")
+        bboxes = make_bbox(format=BBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(batch_size,))
+        mock = mocker.patch("torchvision.prototype.transforms._geometry.F.clamp_bboxes")
 
         transform = transforms.FixedSizeCrop((-1, -1))
         mocker.patch("torchvision.prototype.transforms._geometry.has_any", return_value=True)
 
-        transform(bounding_boxes)
+        transform(bboxes)
 
         mock.assert_called_once()
 
@@ -390,7 +386,7 @@ class TestPermuteDimensions:
     def test_call(self, dims, inverse_dims):
         sample = dict(
             image=make_image(),
-            bounding_boxes=make_bounding_box(format=BoundingBoxFormat.XYXY),
+            bboxes=make_bbox(format=BBoxFormat.XYXY),
             video=make_video(),
             str="str",
             int=0,
@@ -434,7 +430,7 @@ class TestTransposeDimensions:
     def test_call(self, dims):
         sample = dict(
             image=make_image(),
-            bounding_boxes=make_bounding_box(format=BoundingBoxFormat.XYXY),
+            bboxes=make_bbox(format=BBoxFormat.XYXY),
             video=make_video(),
             str="str",
             int=0,
@@ -496,7 +492,7 @@ def make_datapoints():
 
         pil_image = to_image_pil(make_image(size=size, color_space="RGB"))
         target = {
-            "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float),
+            "boxes": make_bbox(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float),
             "labels": make_label(extra_dims=(num_objects,), categories=80),
             "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long),
         }
@@ -505,7 +501,7 @@ def make_datapoints():
 
         tensor_image = torch.Tensor(make_image(size=size, color_space="RGB"))
         target = {
-            "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float),
+            "boxes": make_bbox(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float),
             "labels": make_label(extra_dims=(num_objects,), categories=80),
             "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long),
         }
@@ -514,7 +510,7 @@ def make_datapoints():
 
         datapoint_image = make_image(size=size, color_space="RGB")
         target = {
-            "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float),
+            "boxes": make_bbox(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float),
             "labels": make_label(extra_dims=(num_objects,), categories=80),
             "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long),
         }
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
index e5624d78fed..0e1bce81560 100644
--- a/test/test_transforms_v2.py
+++ b/test/test_transforms_v2.py
@@ -16,8 +16,8 @@
     assert_equal,
     assert_run_python_script,
     cpu_and_cuda,
-    make_bounding_box,
-    make_bounding_boxes,
+    make_bbox,
+    make_bboxes,
     make_detection_mask,
     make_image,
     make_images,
@@ -45,9 +45,9 @@ def make_pil_images(*args, **kwargs):
         yield to_pil_image(image)
 
 
-def make_vanilla_tensor_bounding_boxes(*args, **kwargs):
-    for bounding_boxes in make_bounding_boxes(*args, **kwargs):
-        yield bounding_boxes.data
+def make_vanilla_tensor_bboxes(*args, **kwargs):
+    for bboxes in make_bboxes(*args, **kwargs):
+        yield bboxes.data
 
 
 def parametrize(transforms_with_inputs):
@@ -69,7 +69,7 @@ def auto_augment_adapter(transform, input, device):
     adapted_input = {}
     image_or_video_found = False
     for key, value in input.items():
-        if isinstance(value, (datapoints.BoundingBoxes, datapoints.Mask)):
+        if isinstance(value, (datapoints.BBoxes, datapoints.Mask)):
             # AA transforms don't support bounding boxes or masks
             continue
         elif check_type(value, (datapoints.Image, datapoints.Video, is_simple_tensor, PIL.Image.Image)):
@@ -143,8 +143,8 @@ class TestSmoke:
             (transforms.RandomZoomOut(p=1.0), None),
             (transforms.Resize([16, 16], antialias=True), None),
             (transforms.ScaleJitter((16, 16), scale_range=(0.8, 1.2), antialias=True), None),
-            (transforms.ClampBoundingBoxes(), None),
-            (transforms.ConvertBoundingBoxFormat(datapoints.BoundingBoxFormat.CXCYWH), None),
+            (transforms.ClampBBoxes(), None),
+            (transforms.ConvertBBoxFormat(datapoints.BBoxFormat.CXCYWH), None),
             (transforms.ConvertImageDtype(), None),
             (transforms.GaussianBlur(kernel_size=3), None),
             (
@@ -180,16 +180,10 @@ def test_common(self, transform, adapter, container_type, image_or_video, device
             image_datapoint=make_image(size=spatial_size),
             video_datapoint=make_video(size=spatial_size),
             image_pil=next(make_pil_images(sizes=[spatial_size], color_spaces=["RGB"])),
-            bounding_boxes_xyxy=make_bounding_box(
-                format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(3,)
-            ),
-            bounding_boxes_xywh=make_bounding_box(
-                format=datapoints.BoundingBoxFormat.XYWH, spatial_size=spatial_size, batch_dims=(4,)
-            ),
-            bounding_boxes_cxcywh=make_bounding_box(
-                format=datapoints.BoundingBoxFormat.CXCYWH, spatial_size=spatial_size, batch_dims=(5,)
-            ),
-            bounding_boxes_degenerate_xyxy=datapoints.BoundingBoxes(
+            bboxes_xyxy=make_bbox(format=datapoints.BBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(3,)),
+            bboxes_xywh=make_bbox(format=datapoints.BBoxFormat.XYWH, spatial_size=spatial_size, batch_dims=(4,)),
+            bboxes_cxcywh=make_bbox(format=datapoints.BBoxFormat.CXCYWH, spatial_size=spatial_size, batch_dims=(5,)),
+            bboxes_degenerate_xyxy=datapoints.BBoxes(
                 [
                     [0, 0, 0, 0],  # no height or width
                     [0, 0, 0, 1],  # no height
@@ -198,10 +192,10 @@ def test_common(self, transform, adapter, container_type, image_or_video, device
                     [0, 2, 1, 1],  # x1 < x2, y1 > y2
                     [2, 2, 1, 1],  # x1 > x2, y1 > y2
                 ],
-                format=datapoints.BoundingBoxFormat.XYXY,
+                format=datapoints.BBoxFormat.XYXY,
                 spatial_size=spatial_size,
             ),
-            bounding_boxes_degenerate_xywh=datapoints.BoundingBoxes(
+            bboxes_degenerate_xywh=datapoints.BBoxes(
                 [
                     [0, 0, 0, 0],  # no height or width
                     [0, 0, 0, 1],  # no height
@@ -210,10 +204,10 @@ def test_common(self, transform, adapter, container_type, image_or_video, device
                     [0, 0, -1, 1],  # negative width
                     [0, 0, -1, -1],  # negative height and width
                 ],
-                format=datapoints.BoundingBoxFormat.XYWH,
+                format=datapoints.BBoxFormat.XYWH,
                 spatial_size=spatial_size,
             ),
-            bounding_boxes_degenerate_cxcywh=datapoints.BoundingBoxes(
+            bboxes_degenerate_cxcywh=datapoints.BBoxes(
                 [
                     [0, 0, 0, 0],  # no height or width
                     [0, 0, 0, 1],  # no height
@@ -222,7 +216,7 @@ def test_common(self, transform, adapter, container_type, image_or_video, device
                     [0, 0, -1, 1],  # negative width
                     [0, 0, -1, -1],  # negative height and width
                 ],
-                format=datapoints.BoundingBoxFormat.CXCYWH,
+                format=datapoints.BBoxFormat.CXCYWH,
                 spatial_size=spatial_size,
             ),
             detection_mask=make_detection_mask(size=spatial_size),
@@ -261,20 +255,18 @@ def test_common(self, transform, adapter, container_type, image_or_video, device
             else:
                 assert output_item is input_item
 
-            if isinstance(input_item, datapoints.BoundingBoxes) and not isinstance(
-                transform, transforms.ConvertBoundingBoxFormat
-            ):
+            if isinstance(input_item, datapoints.BBoxes) and not isinstance(transform, transforms.ConvertBBoxFormat):
                 assert output_item.format == input_item.format
 
         # Enforce that the transform does not turn a degenerate box marked by RandomIoUCrop (or any other future
         # transform that does this), back into a valid one.
         # TODO: we should test that against all degenerate boxes above
-        for format in list(datapoints.BoundingBoxFormat):
+        for format in list(datapoints.BBoxFormat):
             sample = dict(
-                boxes=datapoints.BoundingBoxes([[0, 0, 0, 0]], format=format, spatial_size=(224, 244)),
+                boxes=datapoints.BBoxes([[0, 0, 0, 0]], format=format, spatial_size=(224, 244)),
                 labels=torch.tensor([3]),
             )
-            assert transforms.SanitizeBoundingBoxes()(sample)["boxes"].shape == (0, 4)
+            assert transforms.SanitizeBBoxes()(sample)["boxes"].shape == (0, 4)
 
     @parametrize(
         [
@@ -942,7 +934,7 @@ def test__transform(self, mocker, p):
 class TestTransform:
     @pytest.mark.parametrize(
         "inpt_type",
-        [torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBoxes, str, int],
+        [torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BBoxes, str, int],
     )
     def test_check_transformed_types(self, inpt_type, mocker):
         # This test ensures that we correctly handle which types to transform and which to bypass
@@ -960,7 +952,7 @@ def test_check_transformed_types(self, inpt_type, mocker):
 class TestToImageTensor:
     @pytest.mark.parametrize(
         "inpt_type",
-        [torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBoxes, str, int],
+        [torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BBoxes, str, int],
     )
     def test__transform(self, inpt_type, mocker):
         fn = mocker.patch(
@@ -971,7 +963,7 @@ def test__transform(self, inpt_type, mocker):
         inpt = mocker.MagicMock(spec=inpt_type)
         transform = transforms.ToImageTensor()
         transform(inpt)
-        if inpt_type in (datapoints.BoundingBoxes, datapoints.Image, str, int):
+        if inpt_type in (datapoints.BBoxes, datapoints.Image, str, int):
             assert fn.call_count == 0
         else:
             fn.assert_called_once_with(inpt)
@@ -980,7 +972,7 @@ def test__transform(self, inpt_type, mocker):
 class TestToImagePIL:
     @pytest.mark.parametrize(
         "inpt_type",
-        [torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBoxes, str, int],
+        [torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BBoxes, str, int],
     )
     def test__transform(self, inpt_type, mocker):
         fn = mocker.patch("torchvision.transforms.v2.functional.to_image_pil")
@@ -988,7 +980,7 @@ def test__transform(self, inpt_type, mocker):
         inpt = mocker.MagicMock(spec=inpt_type)
         transform = transforms.ToImagePIL()
         transform(inpt)
-        if inpt_type in (datapoints.BoundingBoxes, PIL.Image.Image, str, int):
+        if inpt_type in (datapoints.BBoxes, PIL.Image.Image, str, int):
             assert fn.call_count == 0
         else:
             fn.assert_called_once_with(inpt, mode=transform.mode)
@@ -997,7 +989,7 @@ def test__transform(self, inpt_type, mocker):
 class TestToPILImage:
     @pytest.mark.parametrize(
         "inpt_type",
-        [torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBoxes, str, int],
+        [torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BBoxes, str, int],
     )
     def test__transform(self, inpt_type, mocker):
         fn = mocker.patch("torchvision.transforms.v2.functional.to_image_pil")
@@ -1005,7 +997,7 @@ def test__transform(self, inpt_type, mocker):
         inpt = mocker.MagicMock(spec=inpt_type)
         transform = transforms.ToPILImage()
         transform(inpt)
-        if inpt_type in (PIL.Image.Image, datapoints.BoundingBoxes, str, int):
+        if inpt_type in (PIL.Image.Image, datapoints.BBoxes, str, int):
             assert fn.call_count == 0
         else:
             fn.assert_called_once_with(inpt, mode=transform.mode)
@@ -1014,7 +1006,7 @@ def test__transform(self, inpt_type, mocker):
 class TestToTensor:
     @pytest.mark.parametrize(
         "inpt_type",
-        [torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBoxes, str, int],
+        [torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BBoxes, str, int],
     )
     def test__transform(self, inpt_type, mocker):
         fn = mocker.patch("torchvision.transforms.functional.to_tensor")
@@ -1023,7 +1015,7 @@ def test__transform(self, inpt_type, mocker):
         with pytest.warns(UserWarning, match="deprecated and will be removed"):
             transform = transforms.ToTensor()
         transform(inpt)
-        if inpt_type in (datapoints.Image, torch.Tensor, datapoints.BoundingBoxes, str, int):
+        if inpt_type in (datapoints.Image, torch.Tensor, datapoints.BBoxes, str, int):
             assert fn.call_count == 0
         else:
             fn.assert_called_once_with(inpt)
@@ -1065,7 +1057,7 @@ def test__get_params(self, device, options, mocker):
         image = mocker.MagicMock(spec=datapoints.Image)
         image.num_channels = 3
         image.spatial_size = (24, 32)
-        bboxes = datapoints.BoundingBoxes(
+        bboxes = datapoints.BBoxes(
             torch.tensor([[1, 1, 10, 10], [20, 20, 23, 23], [1, 20, 10, 23], [20, 1, 23, 10]]),
             format="XYXY",
             spatial_size=image.spatial_size,
@@ -1103,7 +1095,7 @@ def test__get_params(self, device, options, mocker):
     def test__transform_empty_params(self, mocker):
         transform = transforms.RandomIoUCrop(sampler_options=[2.0])
         image = datapoints.Image(torch.rand(1, 3, 4, 4))
-        bboxes = datapoints.BoundingBoxes(torch.tensor([[1, 1, 2, 2]]), format="XYXY", spatial_size=(4, 4))
+        bboxes = datapoints.BBoxes(torch.tensor([[1, 1, 2, 2]]), format="XYXY", spatial_size=(4, 4))
         label = torch.tensor([1])
         sample = [image, bboxes, label]
         # Let's mock transform._get_params to control the output:
@@ -1123,7 +1115,7 @@ def test__transform(self, mocker):
         transform = transforms.RandomIoUCrop()
 
         image = datapoints.Image(torch.rand(3, 32, 24))
-        bboxes = make_bounding_box(format="XYXY", spatial_size=(32, 24), batch_dims=(6,))
+        bboxes = make_bbox(format="XYXY", spatial_size=(32, 24), batch_dims=(6,))
         masks = make_detection_mask((32, 24), num_objects=6)
 
         sample = [image, bboxes, masks]
@@ -1147,7 +1139,7 @@ def test__transform(self, mocker):
 
         # check number of bboxes vs number of labels:
         output_bboxes = output[1]
-        assert isinstance(output_bboxes, datapoints.BoundingBoxes)
+        assert isinstance(output_bboxes, datapoints.BBoxes)
         assert (output_bboxes[~is_within_crop_area] == 0).all()
 
         output_masks = output[2]
@@ -1505,7 +1497,7 @@ def test_detection_preset(image_type, data_augmentation, to_tensor, sanitize):
             transforms.ConvertImageDtype(torch.float),
         ]
     if sanitize:
-        t += [transforms.SanitizeBoundingBoxes()]
+        t += [transforms.SanitizeBBoxes()]
     t = transforms.Compose(t)
 
     num_boxes = 5
@@ -1523,7 +1515,7 @@ def test_detection_preset(image_type, data_augmentation, to_tensor, sanitize):
     boxes = torch.randint(0, min(H, W) // 2, size=(num_boxes, 4))
     boxes[:, 2:] += boxes[:, :2]
     boxes = boxes.clamp(min=0, max=min(H, W))
-    boxes = datapoints.BoundingBoxes(boxes, format="XYXY", spatial_size=(H, W))
+    boxes = datapoints.BBoxes(boxes, format="XYXY", spatial_size=(H, W))
 
     masks = datapoints.Mask(torch.randint(0, 2, size=(num_boxes, H, W), dtype=torch.uint8))
 
@@ -1546,7 +1538,7 @@ def test_detection_preset(image_type, data_augmentation, to_tensor, sanitize):
         # ssd and ssdlite contain RandomIoUCrop which may "remove" some bbox. It
         # doesn't remove them strictly speaking, it just marks some boxes as
         # degenerate and those boxes will be later removed by
-        # SanitizeBoundingBoxes(), which we add to the pipelines if the sanitize
+        # SanitizeBBoxes(), which we add to the pipelines if the sanitize
         # param is True.
         # Note that the values below are probably specific to the random seed
         # set above (which is fine).
@@ -1560,7 +1552,7 @@ def test_detection_preset(image_type, data_augmentation, to_tensor, sanitize):
 @pytest.mark.parametrize("min_size", (1, 10))
 @pytest.mark.parametrize("labels_getter", ("default", lambda inputs: inputs["labels"], None, lambda inputs: None))
 @pytest.mark.parametrize("sample_type", (tuple, dict))
-def test_sanitize_bounding_boxes(min_size, labels_getter, sample_type):
+def test_sanitize_bboxes(min_size, labels_getter, sample_type):
 
     if sample_type is tuple and not isinstance(labels_getter, str):
         # The "lambda inputs: inputs["labels"]" labels_getter used in this test
@@ -1594,9 +1586,9 @@ def test_sanitize_bounding_boxes(min_size, labels_getter, sample_type):
     boxes = torch.tensor(boxes)
     labels = torch.arange(boxes.shape[0])
 
-    boxes = datapoints.BoundingBoxes(
+    boxes = datapoints.BBoxes(
         boxes,
-        format=datapoints.BoundingBoxFormat.XYXY,
+        format=datapoints.BBoxFormat.XYXY,
         spatial_size=(H, W),
     )
 
@@ -1616,7 +1608,7 @@ def test_sanitize_bounding_boxes(min_size, labels_getter, sample_type):
         img = sample.pop("image")
         sample = (img, sample)
 
-    out = transforms.SanitizeBoundingBoxes(min_size=min_size, labels_getter=labels_getter)(sample)
+    out = transforms.SanitizeBBoxes(min_size=min_size, labels_getter=labels_getter)(sample)
 
     if sample_type is tuple:
         out_image = out[0]
@@ -1634,7 +1626,7 @@ def test_sanitize_bounding_boxes(min_size, labels_getter, sample_type):
     assert out_image is input_img
     assert out_whatever is whatever
 
-    assert isinstance(out_boxes, datapoints.BoundingBoxes)
+    assert isinstance(out_boxes, datapoints.BBoxes)
     assert isinstance(out_masks, datapoints.Mask)
 
     if labels_getter is None or (callable(labels_getter) and labels_getter({"labels": "blah"}) is None):
@@ -1646,42 +1638,42 @@ def test_sanitize_bounding_boxes(min_size, labels_getter, sample_type):
         assert out_labels.tolist() == valid_indices
 
 
-def test_sanitize_bounding_boxes_errors():
+def test_sanitize_bboxes_errors():
 
-    good_bbox = datapoints.BoundingBoxes(
+    good_bbox = datapoints.BBoxes(
         [[0, 0, 10, 10]],
-        format=datapoints.BoundingBoxFormat.XYXY,
+        format=datapoints.BBoxFormat.XYXY,
         spatial_size=(20, 20),
     )
 
     with pytest.raises(ValueError, match="min_size must be >= 1"):
-        transforms.SanitizeBoundingBoxes(min_size=0)
+        transforms.SanitizeBBoxes(min_size=0)
     with pytest.raises(ValueError, match="labels_getter should either be 'default'"):
-        transforms.SanitizeBoundingBoxes(labels_getter=12)
+        transforms.SanitizeBBoxes(labels_getter=12)
 
     with pytest.raises(ValueError, match="Could not infer where the labels are"):
         bad_labels_key = {"bbox": good_bbox, "BAD_KEY": torch.arange(good_bbox.shape[0])}
-        transforms.SanitizeBoundingBoxes()(bad_labels_key)
+        transforms.SanitizeBBoxes()(bad_labels_key)
 
     with pytest.raises(ValueError, match="must be a tensor"):
         not_a_tensor = {"bbox": good_bbox, "labels": torch.arange(good_bbox.shape[0]).tolist()}
-        transforms.SanitizeBoundingBoxes()(not_a_tensor)
+        transforms.SanitizeBBoxes()(not_a_tensor)
 
     with pytest.raises(ValueError, match="Number of boxes"):
         different_sizes = {"bbox": good_bbox, "labels": torch.arange(good_bbox.shape[0] + 3)}
-        transforms.SanitizeBoundingBoxes()(different_sizes)
+        transforms.SanitizeBBoxes()(different_sizes)
 
     with pytest.raises(ValueError, match="boxes must be of shape"):
-        bad_bbox = datapoints.BoundingBoxes(  # batch with 2 elements
+        bad_bbox = datapoints.BBoxes(  # batch with 2 elements
             [
                 [[0, 0, 10, 10]],
                 [[0, 0, 10, 10]],
             ],
-            format=datapoints.BoundingBoxFormat.XYXY,
+            format=datapoints.BBoxFormat.XYXY,
             spatial_size=(20, 20),
         )
         different_sizes = {"bbox": bad_bbox, "labels": torch.arange(bad_bbox.shape[0])}
-        transforms.SanitizeBoundingBoxes()(different_sizes)
+        transforms.SanitizeBBoxes()(different_sizes)
 
 
 @pytest.mark.parametrize(
diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py
index 9adec66b3c4..781c3cd864d 100644
--- a/test/test_transforms_v2_consistency.py
+++ b/test/test_transforms_v2_consistency.py
@@ -17,7 +17,7 @@
     ArgsKwargs,
     assert_close,
     assert_equal,
-    make_bounding_box,
+    make_bbox,
     make_detection_mask,
     make_image,
     make_images,
@@ -1090,7 +1090,7 @@ def make_label(extra_dims, categories):
 
         pil_image = to_image_pil(make_image(size=size, color_space="RGB"))
         target = {
-            "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float),
+            "boxes": make_bbox(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float),
             "labels": make_label(extra_dims=(num_objects,), categories=80),
         }
         if with_mask:
@@ -1100,7 +1100,7 @@ def make_label(extra_dims, categories):
 
         tensor_image = torch.Tensor(make_image(size=size, color_space="RGB", dtype=torch.float32))
         target = {
-            "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float),
+            "boxes": make_bbox(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float),
             "labels": make_label(extra_dims=(num_objects,), categories=80),
         }
         if with_mask:
@@ -1110,7 +1110,7 @@ def make_label(extra_dims, categories):
 
         datapoint_image = make_image(size=size, color_space="RGB", dtype=torch.float32)
         target = {
-            "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float),
+            "boxes": make_bbox(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float),
             "labels": make_label(extra_dims=(num_objects,), categories=80),
         }
         if with_mask:
@@ -1127,7 +1127,7 @@ def make_label(extra_dims, categories):
                 v2_transforms.Compose(
                     [
                         v2_transforms.RandomIoUCrop(),
-                        v2_transforms.SanitizeBoundingBoxes(labels_getter=lambda sample: sample[1]["labels"]),
+                        v2_transforms.SanitizeBBoxes(labels_getter=lambda sample: sample[1]["labels"]),
                     ]
                 ),
                 {"with_mask": False},
diff --git a/test/test_transforms_v2_functional.py b/test/test_transforms_v2_functional.py
index 5d692b58108..c46b34f4755 100644
--- a/test/test_transforms_v2_functional.py
+++ b/test/test_transforms_v2_functional.py
@@ -16,7 +16,7 @@
     cache,
     cpu_and_cuda,
     DEFAULT_SQUARE_SPATIAL_SIZE,
-    make_bounding_boxes,
+    make_bboxes,
     needs_cuda,
     parametrized_error_message,
     set_rng_seed,
@@ -26,7 +26,7 @@
 from torchvision.transforms.functional import _get_perspective_coeffs
 from torchvision.transforms.v2 import functional as F
 from torchvision.transforms.v2.functional._geometry import _center_crop_compute_padding
-from torchvision.transforms.v2.functional._meta import clamp_bounding_boxes, convert_format_bounding_boxes
+from torchvision.transforms.v2.functional._meta import clamp_bboxes, convert_format_bboxes
 from torchvision.transforms.v2.utils import is_simple_tensor
 from transforms_v2_dispatcher_infos import DISPATCHER_INFOS
 from transforms_v2_kernel_infos import KERNEL_INFOS
@@ -176,7 +176,7 @@ def test_batched_vs_single(self, test_id, info, args_kwargs, device):
         # Everything to the left is considered a batch dimension.
         data_dims = {
             datapoints.Image: 3,
-            datapoints.BoundingBoxes: 1,
+            datapoints.BBoxes: 1,
             # `Mask`'s are special in the sense that the data dimensions depend on the type of mask. For detection masks
             # it is 3 `(*, N, H, W)`, but for segmentation masks it is 2 `(*, H, W)`. Since both a grouped under one
             # type all kernels should also work without differentiating between the two. Thus, we go with 2 here as
@@ -515,15 +515,15 @@ def test_unkown_type(self, info):
         [
             info
             for info in DISPATCHER_INFOS
-            if datapoints.BoundingBoxes in info.kernels and info.dispatcher is not F.convert_format_bounding_boxes
+            if datapoints.BBoxes in info.kernels and info.dispatcher is not F.convert_format_bboxes
         ],
-        args_kwargs_fn=lambda info: info.sample_inputs(datapoints.BoundingBoxes),
+        args_kwargs_fn=lambda info: info.sample_inputs(datapoints.BBoxes),
     )
-    def test_bounding_boxes_format_consistency(self, info, args_kwargs):
-        (bounding_boxes, *other_args), kwargs = args_kwargs.load()
-        format = bounding_boxes.format
+    def test_bboxes_format_consistency(self, info, args_kwargs):
+        (bboxes, *other_args), kwargs = args_kwargs.load()
+        format = bboxes.format
 
-        output = info.dispatcher(bounding_boxes, *other_args, **kwargs)
+        output = info.dispatcher(bboxes, *other_args, **kwargs)
 
         assert output.format == format
 
@@ -562,61 +562,59 @@ def assert_samples_from_standard_normal(t):
     assert_samples_from_standard_normal(F.normalize_image_tensor(image, mean, std))
 
 
-class TestClampBoundingBoxes:
+class TestClampBBoxes:
     @pytest.mark.parametrize(
         "metadata",
         [
             dict(),
-            dict(format=datapoints.BoundingBoxFormat.XYXY),
+            dict(format=datapoints.BBoxFormat.XYXY),
             dict(spatial_size=(1, 1)),
         ],
     )
     def test_simple_tensor_insufficient_metadata(self, metadata):
-        simple_tensor = next(make_bounding_boxes()).as_subclass(torch.Tensor)
+        simple_tensor = next(make_bboxes()).as_subclass(torch.Tensor)
 
         with pytest.raises(ValueError, match=re.escape("`format` and `spatial_size` has to be passed")):
-            F.clamp_bounding_boxes(simple_tensor, **metadata)
+            F.clamp_bboxes(simple_tensor, **metadata)
 
     @pytest.mark.parametrize(
         "metadata",
         [
-            dict(format=datapoints.BoundingBoxFormat.XYXY),
+            dict(format=datapoints.BBoxFormat.XYXY),
             dict(spatial_size=(1, 1)),
-            dict(format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(1, 1)),
+            dict(format=datapoints.BBoxFormat.XYXY, spatial_size=(1, 1)),
         ],
     )
     def test_datapoint_explicit_metadata(self, metadata):
-        datapoint = next(make_bounding_boxes())
+        datapoint = next(make_bboxes())
 
         with pytest.raises(ValueError, match=re.escape("`format` and `spatial_size` must not be passed")):
-            F.clamp_bounding_boxes(datapoint, **metadata)
+            F.clamp_bboxes(datapoint, **metadata)
 
 
-class TestConvertFormatBoundingBoxes:
+class TestConvertFormatBBoxes:
     @pytest.mark.parametrize(
         ("inpt", "old_format"),
         [
-            (next(make_bounding_boxes()), None),
-            (next(make_bounding_boxes()).as_subclass(torch.Tensor), datapoints.BoundingBoxFormat.XYXY),
+            (next(make_bboxes()), None),
+            (next(make_bboxes()).as_subclass(torch.Tensor), datapoints.BBoxFormat.XYXY),
         ],
     )
     def test_missing_new_format(self, inpt, old_format):
         with pytest.raises(TypeError, match=re.escape("missing 1 required argument: 'new_format'")):
-            F.convert_format_bounding_boxes(inpt, old_format)
+            F.convert_format_bboxes(inpt, old_format)
 
     def test_simple_tensor_insufficient_metadata(self):
-        simple_tensor = next(make_bounding_boxes()).as_subclass(torch.Tensor)
+        simple_tensor = next(make_bboxes()).as_subclass(torch.Tensor)
 
         with pytest.raises(ValueError, match=re.escape("`old_format` has to be passed")):
-            F.convert_format_bounding_boxes(simple_tensor, new_format=datapoints.BoundingBoxFormat.CXCYWH)
+            F.convert_format_bboxes(simple_tensor, new_format=datapoints.BBoxFormat.CXCYWH)
 
     def test_datapoint_explicit_metadata(self):
-        datapoint = next(make_bounding_boxes())
+        datapoint = next(make_bboxes())
 
         with pytest.raises(ValueError, match=re.escape("`old_format` must not be passed")):
-            F.convert_format_bounding_boxes(
-                datapoint, old_format=datapoint.format, new_format=datapoints.BoundingBoxFormat.CXCYWH
-            )
+            F.convert_format_bboxes(datapoint, old_format=datapoint.format, new_format=datapoints.BBoxFormat.CXCYWH)
 
 
 # TODO: All correctness checks below this line should be ported to be references on a `KernelInfo` in
@@ -649,7 +647,7 @@ def _compute_affine_matrix(angle_, translate_, scale_, shear_, center_):
 @pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "format",
-    [datapoints.BoundingBoxFormat.XYXY, datapoints.BoundingBoxFormat.XYWH, datapoints.BoundingBoxFormat.CXCYWH],
+    [datapoints.BBoxFormat.XYXY, datapoints.BBoxFormat.XYWH, datapoints.BBoxFormat.CXCYWH],
 )
 @pytest.mark.parametrize(
     "top, left, height, width, expected_bboxes",
@@ -658,7 +656,7 @@ def _compute_affine_matrix(angle_, translate_, scale_, shear_, center_):
         [-8, 12, 70, 40, [(-2.0, 23.0, 13.0, 43.0), (38.0, 13.0, 58.0, 30.0), (33.0, 54.0, 44.0, 70.0)]],
     ],
 )
-def test_correctness_crop_bounding_boxes(device, format, top, left, height, width, expected_bboxes):
+def test_correctness_crop_bboxes(device, format, top, left, height, width, expected_bboxes):
 
     # Expected bboxes computed using Albumentations:
     # import numpy as np
@@ -672,7 +670,7 @@ def test_correctness_crop_bounding_boxes(device, format, top, left, height, widt
     #     out_box = denormalize_bbox(n_out_box, height, width)
     #     expected_bboxes.append(out_box)
 
-    format = datapoints.BoundingBoxFormat.XYXY
+    format = datapoints.BBoxFormat.XYXY
     spatial_size = (64, 76)
     in_boxes = [
         [10.0, 15.0, 25.0, 35.0],
@@ -680,14 +678,14 @@ def test_correctness_crop_bounding_boxes(device, format, top, left, height, widt
         [45.0, 46.0, 56.0, 62.0],
     ]
     in_boxes = torch.tensor(in_boxes, device=device)
-    if format != datapoints.BoundingBoxFormat.XYXY:
-        in_boxes = convert_format_bounding_boxes(in_boxes, datapoints.BoundingBoxFormat.XYXY, format)
+    if format != datapoints.BBoxFormat.XYXY:
+        in_boxes = convert_format_bboxes(in_boxes, datapoints.BBoxFormat.XYXY, format)
 
-    expected_bboxes = clamp_bounding_boxes(
-        datapoints.BoundingBoxes(expected_bboxes, format="XYXY", spatial_size=spatial_size)
+    expected_bboxes = clamp_bboxes(
+        datapoints.BBoxes(expected_bboxes, format="XYXY", spatial_size=spatial_size)
     ).tolist()
 
-    output_boxes, output_spatial_size = F.crop_bounding_boxes(
+    output_boxes, output_spatial_size = F.crop_bboxes(
         in_boxes,
         format,
         top,
@@ -696,8 +694,8 @@ def test_correctness_crop_bounding_boxes(device, format, top, left, height, widt
         spatial_size[1],
     )
 
-    if format != datapoints.BoundingBoxFormat.XYXY:
-        output_boxes = convert_format_bounding_boxes(output_boxes, format, datapoints.BoundingBoxFormat.XYXY)
+    if format != datapoints.BBoxFormat.XYXY:
+        output_boxes = convert_format_bboxes(output_boxes, format, datapoints.BBoxFormat.XYXY)
 
     torch.testing.assert_close(output_boxes.tolist(), expected_bboxes)
     torch.testing.assert_close(output_spatial_size, spatial_size)
@@ -718,7 +716,7 @@ def test_correctness_vertical_flip_segmentation_mask_on_fixed_input(device):
 @pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "format",
-    [datapoints.BoundingBoxFormat.XYXY, datapoints.BoundingBoxFormat.XYWH, datapoints.BoundingBoxFormat.CXCYWH],
+    [datapoints.BBoxFormat.XYXY, datapoints.BBoxFormat.XYWH, datapoints.BBoxFormat.CXCYWH],
 )
 @pytest.mark.parametrize(
     "top, left, height, width, size",
@@ -727,7 +725,7 @@ def test_correctness_vertical_flip_segmentation_mask_on_fixed_input(device):
         [-5, 5, 35, 45, (32, 34)],
     ],
 )
-def test_correctness_resized_crop_bounding_boxes(device, format, top, left, height, width, size):
+def test_correctness_resized_crop_bboxes(device, format, top, left, height, width, size):
     def _compute_expected_bbox(bbox, top_, left_, height_, width_, size_):
         # bbox should be xyxy
         bbox[0] = (bbox[0] - left_) * size_[1] / width_
@@ -736,7 +734,7 @@ def _compute_expected_bbox(bbox, top_, left_, height_, width_, size_):
         bbox[3] = (bbox[3] - top_) * size_[0] / height_
         return bbox
 
-    format = datapoints.BoundingBoxFormat.XYXY
+    format = datapoints.BBoxFormat.XYXY
     spatial_size = (100, 100)
     in_boxes = [
         [10.0, 10.0, 20.0, 20.0],
@@ -747,16 +745,14 @@ def _compute_expected_bbox(bbox, top_, left_, height_, width_, size_):
         expected_bboxes.append(_compute_expected_bbox(list(in_box), top, left, height, width, size))
     expected_bboxes = torch.tensor(expected_bboxes, device=device)
 
-    in_boxes = datapoints.BoundingBoxes(
-        in_boxes, format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size, device=device
-    )
-    if format != datapoints.BoundingBoxFormat.XYXY:
-        in_boxes = convert_format_bounding_boxes(in_boxes, datapoints.BoundingBoxFormat.XYXY, format)
+    in_boxes = datapoints.BBoxes(in_boxes, format=datapoints.BBoxFormat.XYXY, spatial_size=spatial_size, device=device)
+    if format != datapoints.BBoxFormat.XYXY:
+        in_boxes = convert_format_bboxes(in_boxes, datapoints.BBoxFormat.XYXY, format)
 
-    output_boxes, output_spatial_size = F.resized_crop_bounding_boxes(in_boxes, format, top, left, height, width, size)
+    output_boxes, output_spatial_size = F.resized_crop_bboxes(in_boxes, format, top, left, height, width, size)
 
-    if format != datapoints.BoundingBoxFormat.XYXY:
-        output_boxes = convert_format_bounding_boxes(output_boxes, format, datapoints.BoundingBoxFormat.XYXY)
+    if format != datapoints.BBoxFormat.XYXY:
+        output_boxes = convert_format_bboxes(output_boxes, format, datapoints.BBoxFormat.XYXY)
 
     torch.testing.assert_close(output_boxes, expected_bboxes)
     torch.testing.assert_close(output_spatial_size, size)
@@ -776,7 +772,7 @@ def _parse_padding(padding):
 
 @pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("padding", [[1], [1, 1], [1, 1, 2, 2]])
-def test_correctness_pad_bounding_boxes(device, padding):
+def test_correctness_pad_bboxes(device, padding):
     def _compute_expected_bbox(bbox, padding_):
         pad_left, pad_up, _, _ = _parse_padding(padding_)
 
@@ -784,14 +780,14 @@ def _compute_expected_bbox(bbox, padding_):
         format = bbox.format
         bbox = (
             bbox.clone()
-            if format == datapoints.BoundingBoxFormat.XYXY
-            else convert_format_bounding_boxes(bbox, new_format=datapoints.BoundingBoxFormat.XYXY)
+            if format == datapoints.BBoxFormat.XYXY
+            else convert_format_bboxes(bbox, new_format=datapoints.BBoxFormat.XYXY)
         )
 
         bbox[0::2] += pad_left
         bbox[1::2] += pad_up
 
-        bbox = convert_format_bounding_boxes(bbox, new_format=format)
+        bbox = convert_format_bboxes(bbox, new_format=format)
         if bbox.dtype != dtype:
             # Temporary cast to original dtype
             # e.g. float32 -> int
@@ -803,12 +799,12 @@ def _compute_expected_spatial_size(bbox, padding_):
         height, width = bbox.spatial_size
         return height + pad_up + pad_down, width + pad_left + pad_right
 
-    for bboxes in make_bounding_boxes():
+    for bboxes in make_bboxes():
         bboxes = bboxes.to(device)
         bboxes_format = bboxes.format
         bboxes_spatial_size = bboxes.spatial_size
 
-        output_boxes, output_spatial_size = F.pad_bounding_boxes(
+        output_boxes, output_spatial_size = F.pad_bboxes(
             bboxes, format=bboxes_format, spatial_size=bboxes_spatial_size, padding=padding
         )
 
@@ -819,7 +815,7 @@ def _compute_expected_spatial_size(bbox, padding_):
 
         expected_bboxes = []
         for bbox in bboxes:
-            bbox = datapoints.BoundingBoxes(bbox, format=bboxes_format, spatial_size=bboxes_spatial_size)
+            bbox = datapoints.BBoxes(bbox, format=bboxes_format, spatial_size=bboxes_spatial_size)
             expected_bboxes.append(_compute_expected_bbox(bbox, padding))
 
         if len(expected_bboxes) > 1:
@@ -849,7 +845,7 @@ def test_correctness_pad_segmentation_mask_on_fixed_input(device):
         [[[3, 2], [32, 3], [30, 24], [2, 25]], [[5, 5], [30, 3], [33, 19], [4, 25]]],
     ],
 )
-def test_correctness_perspective_bounding_boxes(device, startpoints, endpoints):
+def test_correctness_perspective_bboxes(device, startpoints, endpoints):
     def _compute_expected_bbox(bbox, pcoeffs_):
         m1 = np.array(
             [
@@ -864,7 +860,7 @@ def _compute_expected_bbox(bbox, pcoeffs_):
             ]
         )
 
-        bbox_xyxy = convert_format_bounding_boxes(bbox, new_format=datapoints.BoundingBoxFormat.XYXY)
+        bbox_xyxy = convert_format_bboxes(bbox, new_format=datapoints.BBoxFormat.XYXY)
         points = np.array(
             [
                 [bbox_xyxy[0].item(), bbox_xyxy[1].item(), 1.0],
@@ -884,24 +880,24 @@ def _compute_expected_bbox(bbox, pcoeffs_):
                 np.max(transformed_points[:, 1]),
             ]
         )
-        out_bbox = datapoints.BoundingBoxes(
+        out_bbox = datapoints.BBoxes(
             out_bbox,
-            format=datapoints.BoundingBoxFormat.XYXY,
+            format=datapoints.BBoxFormat.XYXY,
             spatial_size=bbox.spatial_size,
             dtype=bbox.dtype,
             device=bbox.device,
         )
-        return clamp_bounding_boxes(convert_format_bounding_boxes(out_bbox, new_format=bbox.format))
+        return clamp_bboxes(convert_format_bboxes(out_bbox, new_format=bbox.format))
 
     spatial_size = (32, 38)
 
     pcoeffs = _get_perspective_coeffs(startpoints, endpoints)
     inv_pcoeffs = _get_perspective_coeffs(endpoints, startpoints)
 
-    for bboxes in make_bounding_boxes(spatial_size=spatial_size, extra_dims=((4,),)):
+    for bboxes in make_bboxes(spatial_size=spatial_size, extra_dims=((4,),)):
         bboxes = bboxes.to(device)
 
-        output_bboxes = F.perspective_bounding_boxes(
+        output_bboxes = F.perspective_bboxes(
             bboxes.as_subclass(torch.Tensor),
             format=bboxes.format,
             spatial_size=bboxes.spatial_size,
@@ -915,7 +911,7 @@ def _compute_expected_bbox(bbox, pcoeffs_):
 
         expected_bboxes = []
         for bbox in bboxes:
-            bbox = datapoints.BoundingBoxes(bbox, format=bboxes.format, spatial_size=bboxes.spatial_size)
+            bbox = datapoints.BBoxes(bbox, format=bboxes.format, spatial_size=bboxes.spatial_size)
             expected_bboxes.append(_compute_expected_bbox(bbox, inv_pcoeffs))
         if len(expected_bboxes) > 1:
             expected_bboxes = torch.stack(expected_bboxes)
@@ -929,12 +925,12 @@ def _compute_expected_bbox(bbox, pcoeffs_):
     "output_size",
     [(18, 18), [18, 15], (16, 19), [12], [46, 48]],
 )
-def test_correctness_center_crop_bounding_boxes(device, output_size):
+def test_correctness_center_crop_bboxes(device, output_size):
     def _compute_expected_bbox(bbox, output_size_):
         format_ = bbox.format
         spatial_size_ = bbox.spatial_size
         dtype = bbox.dtype
-        bbox = convert_format_bounding_boxes(bbox.float(), format_, datapoints.BoundingBoxFormat.XYWH)
+        bbox = convert_format_bboxes(bbox.float(), format_, datapoints.BBoxFormat.XYWH)
 
         if len(output_size_) == 1:
             output_size_.append(output_size_[-1])
@@ -948,16 +944,16 @@ def _compute_expected_bbox(bbox, output_size_):
             bbox[3].item(),
         ]
         out_bbox = torch.tensor(out_bbox)
-        out_bbox = convert_format_bounding_boxes(out_bbox, datapoints.BoundingBoxFormat.XYWH, format_)
-        out_bbox = clamp_bounding_boxes(out_bbox, format=format_, spatial_size=output_size)
+        out_bbox = convert_format_bboxes(out_bbox, datapoints.BBoxFormat.XYWH, format_)
+        out_bbox = clamp_bboxes(out_bbox, format=format_, spatial_size=output_size)
         return out_bbox.to(dtype=dtype, device=bbox.device)
 
-    for bboxes in make_bounding_boxes(extra_dims=((4,),)):
+    for bboxes in make_bboxes(extra_dims=((4,),)):
         bboxes = bboxes.to(device)
         bboxes_format = bboxes.format
         bboxes_spatial_size = bboxes.spatial_size
 
-        output_boxes, output_spatial_size = F.center_crop_bounding_boxes(
+        output_boxes, output_spatial_size = F.center_crop_bboxes(
             bboxes, bboxes_format, bboxes_spatial_size, output_size
         )
 
@@ -966,7 +962,7 @@ def _compute_expected_bbox(bbox, output_size_):
 
         expected_bboxes = []
         for bbox in bboxes:
-            bbox = datapoints.BoundingBoxes(bbox, format=bboxes_format, spatial_size=bboxes_spatial_size)
+            bbox = datapoints.BBoxes(bbox, format=bboxes_format, spatial_size=bboxes_spatial_size)
             expected_bboxes.append(_compute_expected_bbox(bbox, output_size))
 
         if len(expected_bboxes) > 1:
diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py
index 3b808d6b73c..96dd7980138 100644
--- a/test/test_transforms_v2_refactored.py
+++ b/test/test_transforms_v2_refactored.py
@@ -19,7 +19,7 @@
     cpu_and_cuda,
     freeze_rng_state,
     ignore_jit_no_profile_information_warning,
-    make_bounding_box,
+    make_bbox,
     make_detection_mask,
     make_image,
     make_image_pil,
@@ -196,7 +196,7 @@ def _check_dispatcher_dispatch(dispatcher, kernel, input, *args, **kwargs):
 
     assert isinstance(output, type(input))
 
-    if isinstance(input, datapoints.BoundingBoxes):
+    if isinstance(input, datapoints.BBoxes):
         assert output.format == input.format
 
 
@@ -306,7 +306,7 @@ def check_transform(transform_cls, input, *args, **kwargs):
     output = transform(input)
     assert isinstance(output, type(input))
 
-    if isinstance(input, datapoints.BoundingBoxes):
+    if isinstance(input, datapoints.BBoxes):
         assert output.format == input.format
 
     _check_transform_v1_compatibility(transform, input)
@@ -392,16 +392,16 @@ def assert_warns_antialias_default_value():
         yield
 
 
-def reference_affine_bounding_boxes_helper(bounding_boxes, *, format, spatial_size, affine_matrix):
+def reference_affine_bboxes_helper(bboxes, *, format, spatial_size, affine_matrix):
     def transform(bbox):
         # Go to float before converting to prevent precision loss in case of CXCYWH -> XYXY and W or H is 1
         in_dtype = bbox.dtype
         if not torch.is_floating_point(bbox):
             bbox = bbox.float()
-        bbox_xyxy = F.convert_format_bounding_boxes(
+        bbox_xyxy = F.convert_format_bboxes(
             bbox.as_subclass(torch.Tensor),
             old_format=format,
-            new_format=datapoints.BoundingBoxFormat.XYXY,
+            new_format=datapoints.BBoxFormat.XYXY,
             inplace=True,
         )
         points = np.array(
@@ -422,15 +422,15 @@ def transform(bbox):
             ],
             dtype=bbox_xyxy.dtype,
         )
-        out_bbox = F.convert_format_bounding_boxes(
-            out_bbox, old_format=datapoints.BoundingBoxFormat.XYXY, new_format=format, inplace=True
+        out_bbox = F.convert_format_bboxes(
+            out_bbox, old_format=datapoints.BBoxFormat.XYXY, new_format=format, inplace=True
         )
         # It is important to clamp before casting, especially for CXCYWH format, dtype=int64
-        out_bbox = F.clamp_bounding_boxes(out_bbox, format=format, spatial_size=spatial_size)
+        out_bbox = F.clamp_bboxes(out_bbox, format=format, spatial_size=spatial_size)
         out_bbox = out_bbox.to(dtype=in_dtype)
         return out_bbox
 
-    return torch.stack([transform(b) for b in bounding_boxes.reshape(-1, 4).unbind()]).reshape(bounding_boxes.shape)
+    return torch.stack([transform(b) for b in bboxes.reshape(-1, 4).unbind()]).reshape(bboxes.shape)
 
 
 class TestResize:
@@ -503,25 +503,25 @@ def test_kernel_image_tensor(self, size, interpolation, use_max_size, antialias,
             check_scripted_vs_eager=not isinstance(size, int),
         )
 
-    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", list(datapoints.BBoxFormat))
     @pytest.mark.parametrize("size", OUTPUT_SIZES)
     @pytest.mark.parametrize("use_max_size", [True, False])
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
-    def test_kernel_bounding_boxes(self, format, size, use_max_size, dtype, device):
+    def test_kernel_bboxes(self, format, size, use_max_size, dtype, device):
         if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)):
             return
 
-        bounding_boxes = make_bounding_box(
+        bboxes = make_bbox(
             format=format,
             spatial_size=self.INPUT_SIZE,
             dtype=dtype,
             device=device,
         )
         check_kernel(
-            F.resize_bounding_boxes,
-            bounding_boxes,
-            spatial_size=bounding_boxes.spatial_size,
+            F.resize_bboxes,
+            bboxes,
+            spatial_size=bboxes.spatial_size,
             size=size,
             **max_size_kwarg,
             check_scripted_vs_eager=not isinstance(size, int),
@@ -541,7 +541,7 @@ def test_kernel_video(self):
             (F.resize_image_tensor, make_image_tensor),
             (F.resize_image_pil, make_image_pil),
             (F.resize_image_tensor, make_image),
-            (F.resize_bounding_boxes, make_bounding_box),
+            (F.resize_bboxes, make_bbox),
             (F.resize_mask, make_segmentation_mask),
             (F.resize_video, make_video),
         ],
@@ -562,7 +562,7 @@ def test_dispatcher(self, size, kernel, make_input):
             (F.resize_image_tensor, torch.Tensor),
             (F.resize_image_pil, PIL.Image.Image),
             (F.resize_image_tensor, datapoints.Image),
-            (F.resize_bounding_boxes, datapoints.BoundingBoxes),
+            (F.resize_bboxes, datapoints.BBoxes),
             (F.resize_mask, datapoints.Mask),
             (F.resize_video, datapoints.Video),
         ],
@@ -578,7 +578,7 @@ def test_dispatcher_signature(self, kernel, input_type):
             make_image_tensor,
             make_image_pil,
             make_image,
-            make_bounding_box,
+            make_bbox,
             make_segmentation_mask,
             make_detection_mask,
             make_video,
@@ -612,45 +612,43 @@ def test_image_correctness(self, size, interpolation, use_max_size, fn):
         self._check_output_size(image, actual, size=size, **max_size_kwarg)
         torch.testing.assert_close(actual, expected, atol=1, rtol=0)
 
-    def _reference_resize_bounding_boxes(self, bounding_boxes, *, size, max_size=None):
-        old_height, old_width = bounding_boxes.spatial_size
-        new_height, new_width = self._compute_output_size(
-            input_size=bounding_boxes.spatial_size, size=size, max_size=max_size
-        )
+    def _reference_resize_bboxes(self, bboxes, *, size, max_size=None):
+        old_height, old_width = bboxes.spatial_size
+        new_height, new_width = self._compute_output_size(input_size=bboxes.spatial_size, size=size, max_size=max_size)
 
         if (old_height, old_width) == (new_height, new_width):
-            return bounding_boxes
+            return bboxes
 
         affine_matrix = np.array(
             [
                 [new_width / old_width, 0, 0],
                 [0, new_height / old_height, 0],
             ],
-            dtype="float64" if bounding_boxes.dtype == torch.float64 else "float32",
+            dtype="float64" if bboxes.dtype == torch.float64 else "float32",
         )
 
-        expected_bboxes = reference_affine_bounding_boxes_helper(
-            bounding_boxes,
-            format=bounding_boxes.format,
+        expected_bboxes = reference_affine_bboxes_helper(
+            bboxes,
+            format=bboxes.format,
             spatial_size=(new_height, new_width),
             affine_matrix=affine_matrix,
         )
-        return datapoints.BoundingBoxes.wrap_like(bounding_boxes, expected_bboxes, spatial_size=(new_height, new_width))
+        return datapoints.BBoxes.wrap_like(bboxes, expected_bboxes, spatial_size=(new_height, new_width))
 
-    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", list(datapoints.BBoxFormat))
     @pytest.mark.parametrize("size", OUTPUT_SIZES)
     @pytest.mark.parametrize("use_max_size", [True, False])
     @pytest.mark.parametrize("fn", [F.resize, transform_cls_to_functional(transforms.Resize)])
-    def test_bounding_boxes_correctness(self, format, size, use_max_size, fn):
+    def test_bboxes_correctness(self, format, size, use_max_size, fn):
         if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)):
             return
 
-        bounding_boxes = make_bounding_box(format=format, spatial_size=self.INPUT_SIZE)
+        bboxes = make_bbox(format=format, spatial_size=self.INPUT_SIZE)
 
-        actual = fn(bounding_boxes, size=size, **max_size_kwarg)
-        expected = self._reference_resize_bounding_boxes(bounding_boxes, size=size, **max_size_kwarg)
+        actual = fn(bboxes, size=size, **max_size_kwarg)
+        expected = self._reference_resize_bboxes(bboxes, size=size, **max_size_kwarg)
 
-        self._check_output_size(bounding_boxes, actual, size=size, **max_size_kwarg)
+        self._check_output_size(bboxes, actual, size=size, **max_size_kwarg)
         torch.testing.assert_close(actual, expected)
 
     @pytest.mark.parametrize("interpolation", set(transforms.InterpolationMode) - set(INTERPOLATION_MODES))
@@ -684,7 +682,7 @@ def test_dispatcher_pil_antialias_warning(self):
             make_image_tensor,
             make_image_pil,
             make_image,
-            make_bounding_box,
+            make_bbox,
             make_segmentation_mask,
             make_detection_mask,
             make_video,
@@ -753,7 +751,7 @@ def test_transform_unknown_size_error(self):
             make_image_tensor,
             make_image_pil,
             make_image,
-            make_bounding_box,
+            make_bbox,
             make_segmentation_mask,
             make_detection_mask,
             make_video,
@@ -780,7 +778,7 @@ def test_noop(self, size, make_input):
             make_image_tensor,
             make_image_pil,
             make_image,
-            make_bounding_box,
+            make_bbox,
             make_segmentation_mask,
             make_detection_mask,
             make_video,
@@ -805,16 +803,16 @@ class TestHorizontalFlip:
     def test_kernel_image_tensor(self, dtype, device):
         check_kernel(F.horizontal_flip_image_tensor, make_image(dtype=dtype, device=device))
 
-    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", list(datapoints.BBoxFormat))
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
-    def test_kernel_bounding_boxes(self, format, dtype, device):
-        bounding_boxes = make_bounding_box(format=format, dtype=dtype, device=device)
+    def test_kernel_bboxes(self, format, dtype, device):
+        bboxes = make_bbox(format=format, dtype=dtype, device=device)
         check_kernel(
-            F.horizontal_flip_bounding_boxes,
-            bounding_boxes,
+            F.horizontal_flip_bboxes,
+            bboxes,
             format=format,
-            spatial_size=bounding_boxes.spatial_size,
+            spatial_size=bboxes.spatial_size,
         )
 
     @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask])
@@ -830,7 +828,7 @@ def test_kernel_video(self):
             (F.horizontal_flip_image_tensor, make_image_tensor),
             (F.horizontal_flip_image_pil, make_image_pil),
             (F.horizontal_flip_image_tensor, make_image),
-            (F.horizontal_flip_bounding_boxes, make_bounding_box),
+            (F.horizontal_flip_bboxes, make_bbox),
             (F.horizontal_flip_mask, make_segmentation_mask),
             (F.horizontal_flip_video, make_video),
         ],
@@ -844,7 +842,7 @@ def test_dispatcher(self, kernel, make_input):
             (F.horizontal_flip_image_tensor, torch.Tensor),
             (F.horizontal_flip_image_pil, PIL.Image.Image),
             (F.horizontal_flip_image_tensor, datapoints.Image),
-            (F.horizontal_flip_bounding_boxes, datapoints.BoundingBoxes),
+            (F.horizontal_flip_bboxes, datapoints.BBoxes),
             (F.horizontal_flip_mask, datapoints.Mask),
             (F.horizontal_flip_video, datapoints.Video),
         ],
@@ -854,7 +852,7 @@ def test_dispatcher_signature(self, kernel, input_type):
 
     @pytest.mark.parametrize(
         "make_input",
-        [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video],
+        [make_image_tensor, make_image_pil, make_image, make_bbox, make_segmentation_mask, make_video],
     )
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_transform(self, make_input, device):
@@ -871,39 +869,39 @@ def test_image_correctness(self, fn):
 
         torch.testing.assert_close(actual, expected)
 
-    def _reference_horizontal_flip_bounding_boxes(self, bounding_boxes):
+    def _reference_horizontal_flip_bboxes(self, bboxes):
         affine_matrix = np.array(
             [
-                [-1, 0, bounding_boxes.spatial_size[1]],
+                [-1, 0, bboxes.spatial_size[1]],
                 [0, 1, 0],
             ],
-            dtype="float64" if bounding_boxes.dtype == torch.float64 else "float32",
+            dtype="float64" if bboxes.dtype == torch.float64 else "float32",
         )
 
-        expected_bboxes = reference_affine_bounding_boxes_helper(
-            bounding_boxes,
-            format=bounding_boxes.format,
-            spatial_size=bounding_boxes.spatial_size,
+        expected_bboxes = reference_affine_bboxes_helper(
+            bboxes,
+            format=bboxes.format,
+            spatial_size=bboxes.spatial_size,
             affine_matrix=affine_matrix,
         )
 
-        return datapoints.BoundingBoxes.wrap_like(bounding_boxes, expected_bboxes)
+        return datapoints.BBoxes.wrap_like(bboxes, expected_bboxes)
 
-    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", list(datapoints.BBoxFormat))
     @pytest.mark.parametrize(
         "fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)]
     )
-    def test_bounding_boxes_correctness(self, format, fn):
-        bounding_boxes = make_bounding_box(format=format)
+    def test_bboxes_correctness(self, format, fn):
+        bboxes = make_bbox(format=format)
 
-        actual = fn(bounding_boxes)
-        expected = self._reference_horizontal_flip_bounding_boxes(bounding_boxes)
+        actual = fn(bboxes)
+        expected = self._reference_horizontal_flip_bboxes(bboxes)
 
         torch.testing.assert_close(actual, expected)
 
     @pytest.mark.parametrize(
         "make_input",
-        [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video],
+        [make_image_tensor, make_image_pil, make_image, make_bbox, make_segmentation_mask, make_video],
     )
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_transform_noop(self, make_input, device):
@@ -986,16 +984,16 @@ def test_kernel_image_tensor(self, param, value, dtype, device):
         shear=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["shear"],
         center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"],
     )
-    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", list(datapoints.BBoxFormat))
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
-    def test_kernel_bounding_boxes(self, param, value, format, dtype, device):
-        bounding_boxes = make_bounding_box(format=format, dtype=dtype, device=device)
+    def test_kernel_bboxes(self, param, value, format, dtype, device):
+        bboxes = make_bbox(format=format, dtype=dtype, device=device)
         self._check_kernel(
-            F.affine_bounding_boxes,
-            bounding_boxes,
+            F.affine_bboxes,
+            bboxes,
             format=format,
-            spatial_size=bounding_boxes.spatial_size,
+            spatial_size=bboxes.spatial_size,
             **{param: value},
             check_scripted_vs_eager=not (param == "shear" and isinstance(value, (int, float))),
         )
@@ -1013,7 +1011,7 @@ def test_kernel_video(self):
             (F.affine_image_tensor, make_image_tensor),
             (F.affine_image_pil, make_image_pil),
             (F.affine_image_tensor, make_image),
-            (F.affine_bounding_boxes, make_bounding_box),
+            (F.affine_bboxes, make_bbox),
             (F.affine_mask, make_segmentation_mask),
             (F.affine_video, make_video),
         ],
@@ -1027,7 +1025,7 @@ def test_dispatcher(self, kernel, make_input):
             (F.affine_image_tensor, torch.Tensor),
             (F.affine_image_pil, PIL.Image.Image),
             (F.affine_image_tensor, datapoints.Image),
-            (F.affine_bounding_boxes, datapoints.BoundingBoxes),
+            (F.affine_bboxes, datapoints.BBoxes),
             (F.affine_mask, datapoints.Mask),
             (F.affine_video, datapoints.Video),
         ],
@@ -1037,7 +1035,7 @@ def test_dispatcher_signature(self, kernel, input_type):
 
     @pytest.mark.parametrize(
         "make_input",
-        [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video],
+        [make_image_tensor, make_image_pil, make_image, make_bbox, make_segmentation_mask, make_video],
     )
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_transform(self, make_input, device):
@@ -1131,43 +1129,43 @@ def _compute_affine_matrix(self, *, angle, translate, scale, shear, center):
         true_matrix = np.matmul(t_matrix, np.matmul(c_matrix, np.matmul(rss_matrix, c_matrix_inv)))
         return true_matrix
 
-    def _reference_affine_bounding_boxes(self, bounding_boxes, *, angle, translate, scale, shear, center):
+    def _reference_affine_bboxes(self, bboxes, *, angle, translate, scale, shear, center):
         if center is None:
-            center = [s * 0.5 for s in bounding_boxes.spatial_size[::-1]]
+            center = [s * 0.5 for s in bboxes.spatial_size[::-1]]
 
         affine_matrix = self._compute_affine_matrix(
             angle=angle, translate=translate, scale=scale, shear=shear, center=center
         )
         affine_matrix = affine_matrix[:2, :]
 
-        expected_bboxes = reference_affine_bounding_boxes_helper(
-            bounding_boxes,
-            format=bounding_boxes.format,
-            spatial_size=bounding_boxes.spatial_size,
+        expected_bboxes = reference_affine_bboxes_helper(
+            bboxes,
+            format=bboxes.format,
+            spatial_size=bboxes.spatial_size,
             affine_matrix=affine_matrix,
         )
 
         return expected_bboxes
 
-    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", list(datapoints.BBoxFormat))
     @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"])
     @pytest.mark.parametrize("translate", _CORRECTNESS_AFFINE_KWARGS["translate"])
     @pytest.mark.parametrize("scale", _CORRECTNESS_AFFINE_KWARGS["scale"])
     @pytest.mark.parametrize("shear", _CORRECTNESS_AFFINE_KWARGS["shear"])
     @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
-    def test_functional_bounding_boxes_correctness(self, format, angle, translate, scale, shear, center):
-        bounding_boxes = make_bounding_box(format=format)
+    def test_functional_bboxes_correctness(self, format, angle, translate, scale, shear, center):
+        bboxes = make_bbox(format=format)
 
         actual = F.affine(
-            bounding_boxes,
+            bboxes,
             angle=angle,
             translate=translate,
             scale=scale,
             shear=shear,
             center=center,
         )
-        expected = self._reference_affine_bounding_boxes(
-            bounding_boxes,
+        expected = self._reference_affine_bboxes(
+            bboxes,
             angle=angle,
             translate=translate,
             scale=scale,
@@ -1177,21 +1175,21 @@ def test_functional_bounding_boxes_correctness(self, format, angle, translate, s
 
         torch.testing.assert_close(actual, expected)
 
-    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", list(datapoints.BBoxFormat))
     @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
     @pytest.mark.parametrize("seed", list(range(5)))
-    def test_transform_bounding_boxes_correctness(self, format, center, seed):
-        bounding_boxes = make_bounding_box(format=format)
+    def test_transform_bboxes_correctness(self, format, center, seed):
+        bboxes = make_bbox(format=format)
 
         transform = transforms.RandomAffine(**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, center=center)
 
         torch.manual_seed(seed)
-        params = transform._get_params([bounding_boxes])
+        params = transform._get_params([bboxes])
 
         torch.manual_seed(seed)
-        actual = transform(bounding_boxes)
+        actual = transform(bboxes)
 
-        expected = self._reference_affine_bounding_boxes(bounding_boxes, **params, center=center)
+        expected = self._reference_affine_bboxes(bboxes, **params, center=center)
 
         torch.testing.assert_close(actual, expected)
 
@@ -1284,16 +1282,16 @@ class TestVerticalFlip:
     def test_kernel_image_tensor(self, dtype, device):
         check_kernel(F.vertical_flip_image_tensor, make_image(dtype=dtype, device=device))
 
-    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", list(datapoints.BBoxFormat))
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
-    def test_kernel_bounding_boxes(self, format, dtype, device):
-        bounding_boxes = make_bounding_box(format=format, dtype=dtype, device=device)
+    def test_kernel_bboxes(self, format, dtype, device):
+        bboxes = make_bbox(format=format, dtype=dtype, device=device)
         check_kernel(
-            F.vertical_flip_bounding_boxes,
-            bounding_boxes,
+            F.vertical_flip_bboxes,
+            bboxes,
             format=format,
-            spatial_size=bounding_boxes.spatial_size,
+            spatial_size=bboxes.spatial_size,
         )
 
     @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask])
@@ -1309,7 +1307,7 @@ def test_kernel_video(self):
             (F.vertical_flip_image_tensor, make_image_tensor),
             (F.vertical_flip_image_pil, make_image_pil),
             (F.vertical_flip_image_tensor, make_image),
-            (F.vertical_flip_bounding_boxes, make_bounding_box),
+            (F.vertical_flip_bboxes, make_bbox),
             (F.vertical_flip_mask, make_segmentation_mask),
             (F.vertical_flip_video, make_video),
         ],
@@ -1323,7 +1321,7 @@ def test_dispatcher(self, kernel, make_input):
             (F.vertical_flip_image_tensor, torch.Tensor),
             (F.vertical_flip_image_pil, PIL.Image.Image),
             (F.vertical_flip_image_tensor, datapoints.Image),
-            (F.vertical_flip_bounding_boxes, datapoints.BoundingBoxes),
+            (F.vertical_flip_bboxes, datapoints.BBoxes),
             (F.vertical_flip_mask, datapoints.Mask),
             (F.vertical_flip_video, datapoints.Video),
         ],
@@ -1333,7 +1331,7 @@ def test_dispatcher_signature(self, kernel, input_type):
 
     @pytest.mark.parametrize(
         "make_input",
-        [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video],
+        [make_image_tensor, make_image_pil, make_image, make_bbox, make_segmentation_mask, make_video],
     )
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_transform(self, make_input, device):
@@ -1348,37 +1346,37 @@ def test_image_correctness(self, fn):
 
         torch.testing.assert_close(actual, expected)
 
-    def _reference_vertical_flip_bounding_boxes(self, bounding_boxes):
+    def _reference_vertical_flip_bboxes(self, bboxes):
         affine_matrix = np.array(
             [
                 [1, 0, 0],
-                [0, -1, bounding_boxes.spatial_size[0]],
+                [0, -1, bboxes.spatial_size[0]],
             ],
-            dtype="float64" if bounding_boxes.dtype == torch.float64 else "float32",
+            dtype="float64" if bboxes.dtype == torch.float64 else "float32",
         )
 
-        expected_bboxes = reference_affine_bounding_boxes_helper(
-            bounding_boxes,
-            format=bounding_boxes.format,
-            spatial_size=bounding_boxes.spatial_size,
+        expected_bboxes = reference_affine_bboxes_helper(
+            bboxes,
+            format=bboxes.format,
+            spatial_size=bboxes.spatial_size,
             affine_matrix=affine_matrix,
         )
 
-        return datapoints.BoundingBoxes.wrap_like(bounding_boxes, expected_bboxes)
+        return datapoints.BBoxes.wrap_like(bboxes, expected_bboxes)
 
-    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", list(datapoints.BBoxFormat))
     @pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)])
-    def test_bounding_boxes_correctness(self, format, fn):
-        bounding_boxes = make_bounding_box(format=format)
+    def test_bboxes_correctness(self, format, fn):
+        bboxes = make_bbox(format=format)
 
-        actual = fn(bounding_boxes)
-        expected = self._reference_vertical_flip_bounding_boxes(bounding_boxes)
+        actual = fn(bboxes)
+        expected = self._reference_vertical_flip_bboxes(bboxes)
 
         torch.testing.assert_close(actual, expected)
 
     @pytest.mark.parametrize(
         "make_input",
-        [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video],
+        [make_image_tensor, make_image_pil, make_image, make_bbox, make_segmentation_mask, make_video],
     )
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_transform_noop(self, make_input, device):
@@ -1435,21 +1433,21 @@ def test_kernel_image_tensor(self, param, value, dtype, device):
         expand=[False, True],
         center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"],
     )
-    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", list(datapoints.BBoxFormat))
     @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
     @pytest.mark.parametrize("device", cpu_and_cuda())
-    def test_kernel_bounding_boxes(self, param, value, format, dtype, device):
+    def test_kernel_bboxes(self, param, value, format, dtype, device):
         kwargs = {param: value}
         if param != "angle":
             kwargs["angle"] = self._MINIMAL_AFFINE_KWARGS["angle"]
 
-        bounding_boxes = make_bounding_box(format=format, dtype=dtype, device=device)
+        bboxes = make_bbox(format=format, dtype=dtype, device=device)
 
         check_kernel(
-            F.rotate_bounding_boxes,
-            bounding_boxes,
+            F.rotate_bboxes,
+            bboxes,
             format=format,
-            spatial_size=bounding_boxes.spatial_size,
+            spatial_size=bboxes.spatial_size,
             **kwargs,
         )
 
@@ -1466,7 +1464,7 @@ def test_kernel_video(self):
             (F.rotate_image_tensor, make_image_tensor),
             (F.rotate_image_pil, make_image_pil),
             (F.rotate_image_tensor, make_image),
-            (F.rotate_bounding_boxes, make_bounding_box),
+            (F.rotate_bboxes, make_bbox),
             (F.rotate_mask, make_segmentation_mask),
             (F.rotate_video, make_video),
         ],
@@ -1480,7 +1478,7 @@ def test_dispatcher(self, kernel, make_input):
             (F.rotate_image_tensor, torch.Tensor),
             (F.rotate_image_pil, PIL.Image.Image),
             (F.rotate_image_tensor, datapoints.Image),
-            (F.rotate_bounding_boxes, datapoints.BoundingBoxes),
+            (F.rotate_bboxes, datapoints.BBoxes),
             (F.rotate_mask, datapoints.Mask),
             (F.rotate_video, datapoints.Video),
         ],
@@ -1490,7 +1488,7 @@ def test_dispatcher_signature(self, kernel, input_type):
 
     @pytest.mark.parametrize(
         "make_input",
-        [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video],
+        [make_image_tensor, make_image_pil, make_image, make_bbox, make_segmentation_mask, make_video],
     )
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_transform(self, make_input, device):
@@ -1549,13 +1547,13 @@ def test_transform_image_correctness(self, center, interpolation, expand, fill,
         mae = (actual.float() - expected.float()).abs().mean()
         assert mae < 1 if interpolation is transforms.InterpolationMode.NEAREST else 6
 
-    def _reference_rotate_bounding_boxes(self, bounding_boxes, *, angle, expand, center):
+    def _reference_rotate_bboxes(self, bboxes, *, angle, expand, center):
         # FIXME
         if expand:
             raise ValueError("This reference currently does not support expand=True")
 
         if center is None:
-            center = [s * 0.5 for s in bounding_boxes.spatial_size[::-1]]
+            center = [s * 0.5 for s in bboxes.spatial_size[::-1]]
 
         a = np.cos(angle * np.pi / 180.0)
         b = np.sin(angle * np.pi / 180.0)
@@ -1566,48 +1564,48 @@ def _reference_rotate_bounding_boxes(self, bounding_boxes, *, angle, expand, cen
                 [a, b, cx - cx * a - b * cy],
                 [-b, a, cy + cx * b - a * cy],
             ],
-            dtype="float64" if bounding_boxes.dtype == torch.float64 else "float32",
+            dtype="float64" if bboxes.dtype == torch.float64 else "float32",
         )
 
-        expected_bboxes = reference_affine_bounding_boxes_helper(
-            bounding_boxes,
-            format=bounding_boxes.format,
-            spatial_size=bounding_boxes.spatial_size,
+        expected_bboxes = reference_affine_bboxes_helper(
+            bboxes,
+            format=bboxes.format,
+            spatial_size=bboxes.spatial_size,
             affine_matrix=affine_matrix,
         )
 
         return expected_bboxes
 
-    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", list(datapoints.BBoxFormat))
     @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"])
     # TODO: add support for expand=True in the reference
     @pytest.mark.parametrize("expand", [False])
     @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
-    def test_functional_bounding_boxes_correctness(self, format, angle, expand, center):
-        bounding_boxes = make_bounding_box(format=format)
+    def test_functional_bboxes_correctness(self, format, angle, expand, center):
+        bboxes = make_bbox(format=format)
 
-        actual = F.rotate(bounding_boxes, angle=angle, expand=expand, center=center)
-        expected = self._reference_rotate_bounding_boxes(bounding_boxes, angle=angle, expand=expand, center=center)
+        actual = F.rotate(bboxes, angle=angle, expand=expand, center=center)
+        expected = self._reference_rotate_bboxes(bboxes, angle=angle, expand=expand, center=center)
 
         torch.testing.assert_close(actual, expected)
 
-    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", list(datapoints.BBoxFormat))
     # TODO: add support for expand=True in the reference
     @pytest.mark.parametrize("expand", [False])
     @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
     @pytest.mark.parametrize("seed", list(range(5)))
-    def test_transform_bounding_boxes_correctness(self, format, expand, center, seed):
-        bounding_boxes = make_bounding_box(format=format)
+    def test_transform_bboxes_correctness(self, format, expand, center, seed):
+        bboxes = make_bbox(format=format)
 
         transform = transforms.RandomRotation(**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, expand=expand, center=center)
 
         torch.manual_seed(seed)
-        params = transform._get_params([bounding_boxes])
+        params = transform._get_params([bboxes])
 
         torch.manual_seed(seed)
-        actual = transform(bounding_boxes)
+        actual = transform(bboxes)
 
-        expected = self._reference_rotate_bounding_boxes(bounding_boxes, **params, expand=expand, center=center)
+        expected = self._reference_rotate_bboxes(bboxes, **params, expand=expand, center=center)
 
         torch.testing.assert_close(actual, expected)
 
@@ -1759,7 +1757,7 @@ def test_dispatcher(self, kernel, make_input, input_dtype, output_dtype, device,
 
     @pytest.mark.parametrize(
         "make_input",
-        [make_image_tensor, make_image, make_bounding_box, make_segmentation_mask, make_video],
+        [make_image_tensor, make_image, make_bbox, make_segmentation_mask, make_video],
     )
     @pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8])
     @pytest.mark.parametrize("output_dtype", [torch.float32, torch.float64, torch.uint8])
@@ -1834,7 +1832,7 @@ def make_inpt_with_bbox_and_mask(self, make_input):
         mask_dtype = torch.bool
         sample = {
             "inpt": make_input(size=(H, W), dtype=inpt_dtype),
-            "bbox": make_bounding_box(size=(H, W), dtype=bbox_dtype),
+            "bbox": make_bbox(size=(H, W), dtype=bbox_dtype),
             "mask": make_detection_mask(size=(H, W), dtype=mask_dtype),
         }
 
@@ -1988,7 +1986,7 @@ def test_error(self, T):
         for input_with_bad_type in (
             F.to_pil_image(imgs[0]),
             datapoints.Mask(torch.rand(12, 12)),
-            datapoints.BoundingBoxes(torch.rand(2, 4), format="XYXY", spatial_size=12),
+            datapoints.BBoxes(torch.rand(2, 4), format="XYXY", spatial_size=12),
         ):
             with pytest.raises(ValueError, match="does not support PIL images, "):
                 cutmix_mixup(input_with_bad_type)
diff --git a/test/test_transforms_v2_utils.py b/test/test_transforms_v2_utils.py
index 58c8bfd5815..5bbc1172d49 100644
--- a/test/test_transforms_v2_utils.py
+++ b/test/test_transforms_v2_utils.py
@@ -4,7 +4,7 @@
 import torch
 
 import torchvision.transforms.v2.utils
-from common_utils import make_bounding_box, make_detection_mask, make_image
+from common_utils import make_bbox, make_detection_mask, make_image
 
 from torchvision import datapoints
 from torchvision.transforms.v2.functional import to_image_pil
@@ -12,7 +12,7 @@
 
 
 IMAGE = make_image(color_space="RGB")
-BOUNDING_BOX = make_bounding_box(format=datapoints.BoundingBoxFormat.XYXY, spatial_size=IMAGE.spatial_size)
+BOUNDING_BOX = make_bbox(format=datapoints.BBoxFormat.XYXY, spatial_size=IMAGE.spatial_size)
 MASK = make_detection_mask(size=IMAGE.spatial_size)
 
 
@@ -20,20 +20,20 @@
     ("sample", "types", "expected"),
     [
         ((IMAGE, BOUNDING_BOX, MASK), (datapoints.Image,), True),
-        ((IMAGE, BOUNDING_BOX, MASK), (datapoints.BoundingBoxes,), True),
+        ((IMAGE, BOUNDING_BOX, MASK), (datapoints.BBoxes,), True),
         ((IMAGE, BOUNDING_BOX, MASK), (datapoints.Mask,), True),
-        ((IMAGE, BOUNDING_BOX, MASK), (datapoints.Image, datapoints.BoundingBoxes), True),
+        ((IMAGE, BOUNDING_BOX, MASK), (datapoints.Image, datapoints.BBoxes), True),
         ((IMAGE, BOUNDING_BOX, MASK), (datapoints.Image, datapoints.Mask), True),
-        ((IMAGE, BOUNDING_BOX, MASK), (datapoints.BoundingBoxes, datapoints.Mask), True),
-        ((MASK,), (datapoints.Image, datapoints.BoundingBoxes), False),
+        ((IMAGE, BOUNDING_BOX, MASK), (datapoints.BBoxes, datapoints.Mask), True),
+        ((MASK,), (datapoints.Image, datapoints.BBoxes), False),
         ((BOUNDING_BOX,), (datapoints.Image, datapoints.Mask), False),
-        ((IMAGE,), (datapoints.BoundingBoxes, datapoints.Mask), False),
+        ((IMAGE,), (datapoints.BBoxes, datapoints.Mask), False),
         (
             (IMAGE, BOUNDING_BOX, MASK),
-            (datapoints.Image, datapoints.BoundingBoxes, datapoints.Mask),
+            (datapoints.Image, datapoints.BBoxes, datapoints.Mask),
             True,
         ),
-        ((), (datapoints.Image, datapoints.BoundingBoxes, datapoints.Mask), False),
+        ((), (datapoints.Image, datapoints.BBoxes, datapoints.Mask), False),
         ((IMAGE, BOUNDING_BOX, MASK), (lambda obj: isinstance(obj, datapoints.Image),), True),
         ((IMAGE, BOUNDING_BOX, MASK), (lambda _: False,), False),
         ((IMAGE, BOUNDING_BOX, MASK), (lambda _: True,), True),
@@ -58,30 +58,30 @@ def test_has_any(sample, types, expected):
     ("sample", "types", "expected"),
     [
         ((IMAGE, BOUNDING_BOX, MASK), (datapoints.Image,), True),
-        ((IMAGE, BOUNDING_BOX, MASK), (datapoints.BoundingBoxes,), True),
+        ((IMAGE, BOUNDING_BOX, MASK), (datapoints.BBoxes,), True),
         ((IMAGE, BOUNDING_BOX, MASK), (datapoints.Mask,), True),
-        ((IMAGE, BOUNDING_BOX, MASK), (datapoints.Image, datapoints.BoundingBoxes), True),
+        ((IMAGE, BOUNDING_BOX, MASK), (datapoints.Image, datapoints.BBoxes), True),
         ((IMAGE, BOUNDING_BOX, MASK), (datapoints.Image, datapoints.Mask), True),
-        ((IMAGE, BOUNDING_BOX, MASK), (datapoints.BoundingBoxes, datapoints.Mask), True),
+        ((IMAGE, BOUNDING_BOX, MASK), (datapoints.BBoxes, datapoints.Mask), True),
         (
             (IMAGE, BOUNDING_BOX, MASK),
-            (datapoints.Image, datapoints.BoundingBoxes, datapoints.Mask),
+            (datapoints.Image, datapoints.BBoxes, datapoints.Mask),
             True,
         ),
-        ((BOUNDING_BOX, MASK), (datapoints.Image, datapoints.BoundingBoxes), False),
+        ((BOUNDING_BOX, MASK), (datapoints.Image, datapoints.BBoxes), False),
         ((BOUNDING_BOX, MASK), (datapoints.Image, datapoints.Mask), False),
-        ((IMAGE, MASK), (datapoints.BoundingBoxes, datapoints.Mask), False),
+        ((IMAGE, MASK), (datapoints.BBoxes, datapoints.Mask), False),
         (
             (IMAGE, BOUNDING_BOX, MASK),
-            (datapoints.Image, datapoints.BoundingBoxes, datapoints.Mask),
+            (datapoints.Image, datapoints.BBoxes, datapoints.Mask),
             True,
         ),
-        ((BOUNDING_BOX, MASK), (datapoints.Image, datapoints.BoundingBoxes, datapoints.Mask), False),
-        ((IMAGE, MASK), (datapoints.Image, datapoints.BoundingBoxes, datapoints.Mask), False),
-        ((IMAGE, BOUNDING_BOX), (datapoints.Image, datapoints.BoundingBoxes, datapoints.Mask), False),
+        ((BOUNDING_BOX, MASK), (datapoints.Image, datapoints.BBoxes, datapoints.Mask), False),
+        ((IMAGE, MASK), (datapoints.Image, datapoints.BBoxes, datapoints.Mask), False),
+        ((IMAGE, BOUNDING_BOX), (datapoints.Image, datapoints.BBoxes, datapoints.Mask), False),
         (
             (IMAGE, BOUNDING_BOX, MASK),
-            (lambda obj: isinstance(obj, (datapoints.Image, datapoints.BoundingBoxes, datapoints.Mask)),),
+            (lambda obj: isinstance(obj, (datapoints.Image, datapoints.BBoxes, datapoints.Mask)),),
             True,
         ),
         ((IMAGE, BOUNDING_BOX, MASK), (lambda _: False,), False),
diff --git a/test/test_utils.py b/test/test_utils.py
index b13bd0f0f5b..f3c58180be0 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -98,7 +98,7 @@ def test_draw_boxes():
     boxes_cp = boxes.clone()
     labels = ["a", "b", "c", "d"]
     colors = ["green", "#FF00FF", (0, 255, 0), "red"]
-    result = utils.draw_bounding_boxes(img, boxes, labels=labels, colors=colors, fill=True)
+    result = utils.draw_bboxes(img, boxes, labels=labels, colors=colors, fill=True)
 
     path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "fakedata", "draw_boxes_util.png")
     if not os.path.exists(path):
@@ -118,17 +118,17 @@ def test_draw_boxes():
 @pytest.mark.parametrize("colors", [None, ["red", "blue", "#FF00FF", (1, 34, 122)], "red", "#FF00FF", (1, 34, 122)])
 def test_draw_boxes_colors(colors):
     img = torch.full((3, 100, 100), 0, dtype=torch.uint8)
-    utils.draw_bounding_boxes(img, boxes, fill=False, width=7, colors=colors)
+    utils.draw_bboxes(img, boxes, fill=False, width=7, colors=colors)
 
     with pytest.raises(ValueError, match="Number of colors must be equal or larger than the number of objects"):
-        utils.draw_bounding_boxes(image=img, boxes=boxes, colors=[])
+        utils.draw_bboxes(image=img, boxes=boxes, colors=[])
 
 
 def test_draw_boxes_vanilla():
     img = torch.full((3, 100, 100), 0, dtype=torch.uint8)
     img_cp = img.clone()
     boxes_cp = boxes.clone()
-    result = utils.draw_bounding_boxes(img, boxes, fill=False, width=7, colors="white")
+    result = utils.draw_bboxes(img, boxes, fill=False, width=7, colors="white")
 
     path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "fakedata", "draw_boxes_vanilla.png")
     if not os.path.exists(path):
@@ -145,7 +145,7 @@ def test_draw_boxes_vanilla():
 def test_draw_boxes_grayscale():
     img = torch.full((1, 4, 4), fill_value=255, dtype=torch.uint8)
     boxes = torch.tensor([[0, 0, 3, 3]], dtype=torch.int64)
-    bboxed_img = utils.draw_bounding_boxes(image=img, boxes=boxes, colors=["#1BBC9B"])
+    bboxed_img = utils.draw_bboxes(image=img, boxes=boxes, colors=["#1BBC9B"])
     assert bboxed_img.size(0) == 3
 
 
@@ -160,33 +160,33 @@ def test_draw_invalid_boxes():
     colors_wrong = ["pink", "blue"]
 
     with pytest.raises(TypeError, match="Tensor expected"):
-        utils.draw_bounding_boxes(img_tp, boxes)
+        utils.draw_bboxes(img_tp, boxes)
     with pytest.raises(ValueError, match="Tensor uint8 expected"):
-        utils.draw_bounding_boxes(img_wrong1, boxes)
+        utils.draw_bboxes(img_wrong1, boxes)
     with pytest.raises(ValueError, match="Pass individual images, not batches"):
-        utils.draw_bounding_boxes(img_wrong2, boxes)
+        utils.draw_bboxes(img_wrong2, boxes)
     with pytest.raises(ValueError, match="Only grayscale and RGB images are supported"):
-        utils.draw_bounding_boxes(img_wrong2[0][:2], boxes)
+        utils.draw_bboxes(img_wrong2[0][:2], boxes)
     with pytest.raises(ValueError, match="Number of boxes"):
-        utils.draw_bounding_boxes(img_correct, boxes, labels_wrong)
+        utils.draw_bboxes(img_correct, boxes, labels_wrong)
     with pytest.raises(ValueError, match="Number of colors"):
-        utils.draw_bounding_boxes(img_correct, boxes, colors=colors_wrong)
+        utils.draw_bboxes(img_correct, boxes, colors=colors_wrong)
     with pytest.raises(ValueError, match="Boxes need to be in"):
-        utils.draw_bounding_boxes(img_correct, boxes_wrong)
+        utils.draw_bboxes(img_correct, boxes_wrong)
 
 
 def test_draw_boxes_warning():
     img = torch.full((3, 100, 100), 255, dtype=torch.uint8)
 
     with pytest.warns(UserWarning, match=re.escape("Argument 'font_size' will be ignored since 'font' is not set.")):
-        utils.draw_bounding_boxes(img, boxes, font_size=11)
+        utils.draw_bboxes(img, boxes, font_size=11)
 
 
 def test_draw_no_boxes():
     img = torch.full((3, 100, 100), 0, dtype=torch.uint8)
     boxes = torch.full((0, 4), 0, dtype=torch.float)
     with pytest.warns(UserWarning, match=re.escape("boxes doesn't contain any box. No box was drawn")):
-        res = utils.draw_bounding_boxes(img, boxes)
+        res = utils.draw_bboxes(img, boxes)
         # Check that the function didn't change the image
         assert res.eq(img).all()
 
diff --git a/test/transforms_v2_dispatcher_infos.py b/test/transforms_v2_dispatcher_infos.py
index 239954dda68..0223cbf1fdb 100644
--- a/test/transforms_v2_dispatcher_infos.py
+++ b/test/transforms_v2_dispatcher_infos.py
@@ -143,7 +143,7 @@ def fill_sequence_needs_broadcast(args_kwargs):
         kernels={
             datapoints.Image: F.crop_image_tensor,
             datapoints.Video: F.crop_video,
-            datapoints.BoundingBoxes: F.crop_bounding_boxes,
+            datapoints.BBoxes: F.crop_bboxes,
             datapoints.Mask: F.crop_mask,
         },
         pil_kernel_info=PILKernelInfo(F.crop_image_pil, kernel_name="crop_image_pil"),
@@ -153,7 +153,7 @@ def fill_sequence_needs_broadcast(args_kwargs):
         kernels={
             datapoints.Image: F.resized_crop_image_tensor,
             datapoints.Video: F.resized_crop_video,
-            datapoints.BoundingBoxes: F.resized_crop_bounding_boxes,
+            datapoints.BBoxes: F.resized_crop_bboxes,
             datapoints.Mask: F.resized_crop_mask,
         },
         pil_kernel_info=PILKernelInfo(F.resized_crop_image_pil),
@@ -163,7 +163,7 @@ def fill_sequence_needs_broadcast(args_kwargs):
         kernels={
             datapoints.Image: F.pad_image_tensor,
             datapoints.Video: F.pad_video,
-            datapoints.BoundingBoxes: F.pad_bounding_boxes,
+            datapoints.BBoxes: F.pad_bboxes,
             datapoints.Mask: F.pad_mask,
         },
         pil_kernel_info=PILKernelInfo(F.pad_image_pil, kernel_name="pad_image_pil"),
@@ -185,7 +185,7 @@ def fill_sequence_needs_broadcast(args_kwargs):
         kernels={
             datapoints.Image: F.perspective_image_tensor,
             datapoints.Video: F.perspective_video,
-            datapoints.BoundingBoxes: F.perspective_bounding_boxes,
+            datapoints.BBoxes: F.perspective_bboxes,
             datapoints.Mask: F.perspective_mask,
         },
         pil_kernel_info=PILKernelInfo(F.perspective_image_pil),
@@ -199,7 +199,7 @@ def fill_sequence_needs_broadcast(args_kwargs):
         kernels={
             datapoints.Image: F.elastic_image_tensor,
             datapoints.Video: F.elastic_video,
-            datapoints.BoundingBoxes: F.elastic_bounding_boxes,
+            datapoints.BBoxes: F.elastic_bboxes,
             datapoints.Mask: F.elastic_mask,
         },
         pil_kernel_info=PILKernelInfo(F.elastic_image_pil),
@@ -210,7 +210,7 @@ def fill_sequence_needs_broadcast(args_kwargs):
         kernels={
             datapoints.Image: F.center_crop_image_tensor,
             datapoints.Video: F.center_crop_video,
-            datapoints.BoundingBoxes: F.center_crop_bounding_boxes,
+            datapoints.BBoxes: F.center_crop_bboxes,
             datapoints.Mask: F.center_crop_mask,
         },
         pil_kernel_info=PILKernelInfo(F.center_crop_image_pil),
@@ -374,15 +374,15 @@ def fill_sequence_needs_broadcast(args_kwargs):
         ],
     ),
     DispatcherInfo(
-        F.clamp_bounding_boxes,
-        kernels={datapoints.BoundingBoxes: F.clamp_bounding_boxes},
+        F.clamp_bboxes,
+        kernels={datapoints.BBoxes: F.clamp_bboxes},
         test_marks=[
             skip_dispatch_datapoint,
         ],
     ),
     DispatcherInfo(
-        F.convert_format_bounding_boxes,
-        kernels={datapoints.BoundingBoxes: F.convert_format_bounding_boxes},
+        F.convert_format_bboxes,
+        kernels={datapoints.BBoxes: F.convert_format_bboxes},
         test_marks=[
             skip_dispatch_datapoint,
         ],
diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py
index 6f1c91ac62a..bf87364ec82 100644
--- a/test/transforms_v2_kernel_infos.py
+++ b/test/transforms_v2_kernel_infos.py
@@ -14,8 +14,8 @@
     get_num_channels,
     ImageLoader,
     InfoBase,
-    make_bounding_box_loader,
-    make_bounding_box_loaders,
+    make_bbox_loader,
+    make_bbox_loaders,
     make_detection_mask_loader,
     make_image_loader,
     make_image_loaders,
@@ -184,16 +184,16 @@ def float32_vs_uint8_fill_adapter(other_args, kwargs):
     return other_args, dict(kwargs, fill=fill)
 
 
-def reference_affine_bounding_boxes_helper(bounding_boxes, *, format, spatial_size, affine_matrix):
+def reference_affine_bboxes_helper(bboxes, *, format, spatial_size, affine_matrix):
     def transform(bbox, affine_matrix_, format_, spatial_size_):
         # Go to float before converting to prevent precision loss in case of CXCYWH -> XYXY and W or H is 1
         in_dtype = bbox.dtype
         if not torch.is_floating_point(bbox):
             bbox = bbox.float()
-        bbox_xyxy = F.convert_format_bounding_boxes(
+        bbox_xyxy = F.convert_format_bboxes(
             bbox.as_subclass(torch.Tensor),
             old_format=format_,
-            new_format=datapoints.BoundingBoxFormat.XYXY,
+            new_format=datapoints.BBoxFormat.XYXY,
             inplace=True,
         )
         points = np.array(
@@ -214,18 +214,18 @@ def transform(bbox, affine_matrix_, format_, spatial_size_):
             ],
             dtype=bbox_xyxy.dtype,
         )
-        out_bbox = F.convert_format_bounding_boxes(
-            out_bbox, old_format=datapoints.BoundingBoxFormat.XYXY, new_format=format_, inplace=True
+        out_bbox = F.convert_format_bboxes(
+            out_bbox, old_format=datapoints.BBoxFormat.XYXY, new_format=format_, inplace=True
         )
         # It is important to clamp before casting, especially for CXCYWH format, dtype=int64
-        out_bbox = F.clamp_bounding_boxes(out_bbox, format=format_, spatial_size=spatial_size_)
+        out_bbox = F.clamp_bboxes(out_bbox, format=format_, spatial_size=spatial_size_)
         out_bbox = out_bbox.to(dtype=in_dtype)
         return out_bbox
 
-    if bounding_boxes.ndim < 2:
-        bounding_boxes = [bounding_boxes]
+    if bboxes.ndim < 2:
+        bboxes = [bboxes]
 
-    expected_bboxes = [transform(bbox, affine_matrix, format, spatial_size) for bbox in bounding_boxes]
+    expected_bboxes = [transform(bbox, affine_matrix, format, spatial_size) for bbox in bboxes]
     if len(expected_bboxes) > 1:
         expected_bboxes = torch.stack(expected_bboxes)
     else:
@@ -234,30 +234,30 @@ def transform(bbox, affine_matrix_, format_, spatial_size_):
     return expected_bboxes
 
 
-def sample_inputs_convert_format_bounding_boxes():
-    formats = list(datapoints.BoundingBoxFormat)
-    for bounding_boxes_loader, new_format in itertools.product(make_bounding_box_loaders(formats=formats), formats):
-        yield ArgsKwargs(bounding_boxes_loader, old_format=bounding_boxes_loader.format, new_format=new_format)
+def sample_inputs_convert_format_bboxes():
+    formats = list(datapoints.BBoxFormat)
+    for bboxes_loader, new_format in itertools.product(make_bbox_loaders(formats=formats), formats):
+        yield ArgsKwargs(bboxes_loader, old_format=bboxes_loader.format, new_format=new_format)
 
 
-def reference_convert_format_bounding_boxes(bounding_boxes, old_format, new_format):
-    return torchvision.ops.box_convert(
-        bounding_boxes, in_fmt=old_format.name.lower(), out_fmt=new_format.name.lower()
-    ).to(bounding_boxes.dtype)
+def reference_convert_format_bboxes(bboxes, old_format, new_format):
+    return torchvision.ops.box_convert(bboxes, in_fmt=old_format.name.lower(), out_fmt=new_format.name.lower()).to(
+        bboxes.dtype
+    )
 
 
-def reference_inputs_convert_format_bounding_boxes():
-    for args_kwargs in sample_inputs_convert_format_bounding_boxes():
+def reference_inputs_convert_format_bboxes():
+    for args_kwargs in sample_inputs_convert_format_bboxes():
         if len(args_kwargs.args[0].shape) == 2:
             yield args_kwargs
 
 
 KERNEL_INFOS.append(
     KernelInfo(
-        F.convert_format_bounding_boxes,
-        sample_inputs_fn=sample_inputs_convert_format_bounding_boxes,
-        reference_fn=reference_convert_format_bounding_boxes,
-        reference_inputs_fn=reference_inputs_convert_format_bounding_boxes,
+        F.convert_format_bboxes,
+        sample_inputs_fn=sample_inputs_convert_format_bboxes,
+        reference_fn=reference_convert_format_bboxes,
+        reference_inputs_fn=reference_inputs_convert_format_bboxes,
         logs_usage=True,
         closeness_kwargs={
             (("TestKernels", "test_against_reference"), torch.int64, "cpu"): dict(atol=1, rtol=0),
@@ -290,11 +290,9 @@ def reference_inputs_crop_image_tensor():
         yield ArgsKwargs(image_loader, **params)
 
 
-def sample_inputs_crop_bounding_boxes():
-    for bounding_boxes_loader, params in itertools.product(
-        make_bounding_box_loaders(), [_CROP_PARAMS[0], _CROP_PARAMS[-1]]
-    ):
-        yield ArgsKwargs(bounding_boxes_loader, format=bounding_boxes_loader.format, **params)
+def sample_inputs_crop_bboxes():
+    for bboxes_loader, params in itertools.product(make_bbox_loaders(), [_CROP_PARAMS[0], _CROP_PARAMS[-1]]):
+        yield ArgsKwargs(bboxes_loader, format=bboxes_loader.format, **params)
 
 
 def sample_inputs_crop_mask():
@@ -312,27 +310,27 @@ def sample_inputs_crop_video():
         yield ArgsKwargs(video_loader, top=4, left=3, height=7, width=8)
 
 
-def reference_crop_bounding_boxes(bounding_boxes, *, format, top, left, height, width):
+def reference_crop_bboxes(bboxes, *, format, top, left, height, width):
     affine_matrix = np.array(
         [
             [1, 0, -left],
             [0, 1, -top],
         ],
-        dtype="float64" if bounding_boxes.dtype == torch.float64 else "float32",
+        dtype="float64" if bboxes.dtype == torch.float64 else "float32",
     )
 
     spatial_size = (height, width)
-    expected_bboxes = reference_affine_bounding_boxes_helper(
-        bounding_boxes, format=format, spatial_size=spatial_size, affine_matrix=affine_matrix
+    expected_bboxes = reference_affine_bboxes_helper(
+        bboxes, format=format, spatial_size=spatial_size, affine_matrix=affine_matrix
     )
     return expected_bboxes, spatial_size
 
 
-def reference_inputs_crop_bounding_boxes():
-    for bounding_boxes_loader, params in itertools.product(
-        make_bounding_box_loaders(extra_dims=((), (4,))), [_CROP_PARAMS[0], _CROP_PARAMS[-1]]
+def reference_inputs_crop_bboxes():
+    for bboxes_loader, params in itertools.product(
+        make_bbox_loaders(extra_dims=((), (4,))), [_CROP_PARAMS[0], _CROP_PARAMS[-1]]
     ):
-        yield ArgsKwargs(bounding_boxes_loader, format=bounding_boxes_loader.format, **params)
+        yield ArgsKwargs(bboxes_loader, format=bboxes_loader.format, **params)
 
 
 KERNEL_INFOS.extend(
@@ -346,10 +344,10 @@ def reference_inputs_crop_bounding_boxes():
             float32_vs_uint8=True,
         ),
         KernelInfo(
-            F.crop_bounding_boxes,
-            sample_inputs_fn=sample_inputs_crop_bounding_boxes,
-            reference_fn=reference_crop_bounding_boxes,
-            reference_inputs_fn=reference_inputs_crop_bounding_boxes,
+            F.crop_bboxes,
+            sample_inputs_fn=sample_inputs_crop_bboxes,
+            reference_fn=reference_crop_bboxes,
+            reference_inputs_fn=reference_inputs_crop_bboxes,
         ),
         KernelInfo(
             F.crop_mask,
@@ -406,9 +404,9 @@ def reference_inputs_resized_crop_image_tensor():
         )
 
 
-def sample_inputs_resized_crop_bounding_boxes():
-    for bounding_boxes_loader in make_bounding_box_loaders():
-        yield ArgsKwargs(bounding_boxes_loader, format=bounding_boxes_loader.format, **_RESIZED_CROP_PARAMS[0])
+def sample_inputs_resized_crop_bboxes():
+    for bboxes_loader in make_bbox_loaders():
+        yield ArgsKwargs(bboxes_loader, format=bboxes_loader.format, **_RESIZED_CROP_PARAMS[0])
 
 
 def sample_inputs_resized_crop_mask():
@@ -436,8 +434,8 @@ def sample_inputs_resized_crop_video():
             },
         ),
         KernelInfo(
-            F.resized_crop_bounding_boxes,
-            sample_inputs_fn=sample_inputs_resized_crop_bounding_boxes,
+            F.resized_crop_bboxes,
+            sample_inputs_fn=sample_inputs_resized_crop_bboxes,
         ),
         KernelInfo(
             F.resized_crop_mask,
@@ -500,14 +498,14 @@ def reference_inputs_pad_image_tensor():
             yield ArgsKwargs(image_loader, fill=fill, **params)
 
 
-def sample_inputs_pad_bounding_boxes():
-    for bounding_boxes_loader, padding in itertools.product(
-        make_bounding_box_loaders(), [1, (1,), (1, 2), (1, 2, 3, 4), [1], [1, 2], [1, 2, 3, 4]]
+def sample_inputs_pad_bboxes():
+    for bboxes_loader, padding in itertools.product(
+        make_bbox_loaders(), [1, (1,), (1, 2), (1, 2, 3, 4), [1], [1, 2], [1, 2, 3, 4]]
     ):
         yield ArgsKwargs(
-            bounding_boxes_loader,
-            format=bounding_boxes_loader.format,
-            spatial_size=bounding_boxes_loader.spatial_size,
+            bboxes_loader,
+            format=bboxes_loader.format,
+            spatial_size=bboxes_loader.spatial_size,
             padding=padding,
             padding_mode="constant",
         )
@@ -530,7 +528,7 @@ def sample_inputs_pad_video():
         yield ArgsKwargs(video_loader, padding=[1])
 
 
-def reference_pad_bounding_boxes(bounding_boxes, *, format, spatial_size, padding, padding_mode):
+def reference_pad_bboxes(bboxes, *, format, spatial_size, padding, padding_mode):
 
     left, right, top, bottom = _parse_pad_padding(padding)
 
@@ -539,26 +537,26 @@ def reference_pad_bounding_boxes(bounding_boxes, *, format, spatial_size, paddin
             [1, 0, left],
             [0, 1, top],
         ],
-        dtype="float64" if bounding_boxes.dtype == torch.float64 else "float32",
+        dtype="float64" if bboxes.dtype == torch.float64 else "float32",
     )
 
     height = spatial_size[0] + top + bottom
     width = spatial_size[1] + left + right
 
-    expected_bboxes = reference_affine_bounding_boxes_helper(
-        bounding_boxes, format=format, spatial_size=(height, width), affine_matrix=affine_matrix
+    expected_bboxes = reference_affine_bboxes_helper(
+        bboxes, format=format, spatial_size=(height, width), affine_matrix=affine_matrix
     )
     return expected_bboxes, (height, width)
 
 
-def reference_inputs_pad_bounding_boxes():
-    for bounding_boxes_loader, padding in itertools.product(
-        make_bounding_box_loaders(extra_dims=((), (4,))), [1, (1,), (1, 2), (1, 2, 3, 4), [1], [1, 2], [1, 2, 3, 4]]
+def reference_inputs_pad_bboxes():
+    for bboxes_loader, padding in itertools.product(
+        make_bbox_loaders(extra_dims=((), (4,))), [1, (1,), (1, 2), (1, 2, 3, 4), [1], [1, 2], [1, 2, 3, 4]]
     ):
         yield ArgsKwargs(
-            bounding_boxes_loader,
-            format=bounding_boxes_loader.format,
-            spatial_size=bounding_boxes_loader.spatial_size,
+            bboxes_loader,
+            format=bboxes_loader.format,
+            spatial_size=bboxes_loader.spatial_size,
             padding=padding,
             padding_mode="constant",
         )
@@ -591,10 +589,10 @@ def pad_xfail_jit_fill_condition(args_kwargs):
             ],
         ),
         KernelInfo(
-            F.pad_bounding_boxes,
-            sample_inputs_fn=sample_inputs_pad_bounding_boxes,
-            reference_fn=reference_pad_bounding_boxes,
-            reference_inputs_fn=reference_inputs_pad_bounding_boxes,
+            F.pad_bboxes,
+            sample_inputs_fn=sample_inputs_pad_bboxes,
+            reference_fn=reference_pad_bboxes,
+            reference_inputs_fn=reference_inputs_pad_bboxes,
             test_marks=[
                 xfail_jit_python_scalar_arg("padding"),
             ],
@@ -655,19 +653,19 @@ def reference_inputs_perspective_image_tensor():
             )
 
 
-def sample_inputs_perspective_bounding_boxes():
-    for bounding_boxes_loader in make_bounding_box_loaders():
+def sample_inputs_perspective_bboxes():
+    for bboxes_loader in make_bbox_loaders():
         yield ArgsKwargs(
-            bounding_boxes_loader,
-            format=bounding_boxes_loader.format,
-            spatial_size=bounding_boxes_loader.spatial_size,
+            bboxes_loader,
+            format=bboxes_loader.format,
+            spatial_size=bboxes_loader.spatial_size,
             startpoints=None,
             endpoints=None,
             coefficients=_PERSPECTIVE_COEFFS[0],
         )
 
-    format = datapoints.BoundingBoxFormat.XYXY
-    loader = make_bounding_box_loader(format=format)
+    format = datapoints.BBoxFormat.XYXY
+    loader = make_bbox_loader(format=format)
     yield ArgsKwargs(
         loader, format=format, spatial_size=loader.spatial_size, startpoints=_STARTPOINTS, endpoints=_ENDPOINTS
     )
@@ -712,8 +710,8 @@ def sample_inputs_perspective_video():
             test_marks=[xfail_jit_python_scalar_arg("fill")],
         ),
         KernelInfo(
-            F.perspective_bounding_boxes,
-            sample_inputs_fn=sample_inputs_perspective_bounding_boxes,
+            F.perspective_bboxes,
+            sample_inputs_fn=sample_inputs_perspective_bboxes,
             closeness_kwargs={
                 **scripted_vs_eager_float64_tolerances("cpu", atol=1e-6, rtol=1e-6),
                 **scripted_vs_eager_float64_tolerances("cuda", atol=1e-6, rtol=1e-6),
@@ -767,13 +765,13 @@ def reference_inputs_elastic_image_tensor():
             yield ArgsKwargs(image_loader, interpolation=interpolation, displacement=displacement, fill=fill)
 
 
-def sample_inputs_elastic_bounding_boxes():
-    for bounding_boxes_loader in make_bounding_box_loaders():
-        displacement = _get_elastic_displacement(bounding_boxes_loader.spatial_size)
+def sample_inputs_elastic_bboxes():
+    for bboxes_loader in make_bbox_loaders():
+        displacement = _get_elastic_displacement(bboxes_loader.spatial_size)
         yield ArgsKwargs(
-            bounding_boxes_loader,
-            format=bounding_boxes_loader.format,
-            spatial_size=bounding_boxes_loader.spatial_size,
+            bboxes_loader,
+            format=bboxes_loader.format,
+            spatial_size=bboxes_loader.spatial_size,
             displacement=displacement,
         )
 
@@ -804,8 +802,8 @@ def sample_inputs_elastic_video():
             test_marks=[xfail_jit_python_scalar_arg("fill")],
         ),
         KernelInfo(
-            F.elastic_bounding_boxes,
-            sample_inputs_fn=sample_inputs_elastic_bounding_boxes,
+            F.elastic_bboxes,
+            sample_inputs_fn=sample_inputs_elastic_bboxes,
         ),
         KernelInfo(
             F.elastic_mask,
@@ -845,12 +843,12 @@ def reference_inputs_center_crop_image_tensor():
         yield ArgsKwargs(image_loader, output_size=output_size)
 
 
-def sample_inputs_center_crop_bounding_boxes():
-    for bounding_boxes_loader, output_size in itertools.product(make_bounding_box_loaders(), _CENTER_CROP_OUTPUT_SIZES):
+def sample_inputs_center_crop_bboxes():
+    for bboxes_loader, output_size in itertools.product(make_bbox_loaders(), _CENTER_CROP_OUTPUT_SIZES):
         yield ArgsKwargs(
-            bounding_boxes_loader,
-            format=bounding_boxes_loader.format,
-            spatial_size=bounding_boxes_loader.spatial_size,
+            bboxes_loader,
+            format=bboxes_loader.format,
+            spatial_size=bboxes_loader.spatial_size,
             output_size=output_size,
         )
 
@@ -887,8 +885,8 @@ def sample_inputs_center_crop_video():
             ],
         ),
         KernelInfo(
-            F.center_crop_bounding_boxes,
-            sample_inputs_fn=sample_inputs_center_crop_bounding_boxes,
+            F.center_crop_bboxes,
+            sample_inputs_fn=sample_inputs_center_crop_bboxes,
             test_marks=[
                 xfail_jit_python_scalar_arg("output_size"),
             ],
@@ -1482,19 +1480,19 @@ def sample_inputs_adjust_saturation_video():
 )
 
 
-def sample_inputs_clamp_bounding_boxes():
-    for bounding_boxes_loader in make_bounding_box_loaders():
+def sample_inputs_clamp_bboxes():
+    for bboxes_loader in make_bbox_loaders():
         yield ArgsKwargs(
-            bounding_boxes_loader,
-            format=bounding_boxes_loader.format,
-            spatial_size=bounding_boxes_loader.spatial_size,
+            bboxes_loader,
+            format=bboxes_loader.format,
+            spatial_size=bboxes_loader.spatial_size,
         )
 
 
 KERNEL_INFOS.append(
     KernelInfo(
-        F.clamp_bounding_boxes,
-        sample_inputs_fn=sample_inputs_clamp_bounding_boxes,
+        F.clamp_bboxes,
+        sample_inputs_fn=sample_inputs_clamp_bboxes,
         logs_usage=True,
     )
 )
diff --git a/torchvision/datapoints/__init__.py b/torchvision/datapoints/__init__.py
index fb51f0497ea..ea7f802d3c5 100644
--- a/torchvision/datapoints/__init__.py
+++ b/torchvision/datapoints/__init__.py
@@ -1,6 +1,6 @@
 from torchvision import _BETA_TRANSFORMS_WARNING, _WARN_ABOUT_BETA_TRANSFORMS
 
-from ._bounding_box import BoundingBoxes, BoundingBoxFormat
+from ._bbox import BBoxes, BBoxFormat
 from ._datapoint import _FillType, _FillTypeJIT, _InputType, _InputTypeJIT
 from ._image import _ImageType, _ImageTypeJIT, _TensorImageType, _TensorImageTypeJIT, Image
 from ._mask import Mask
diff --git a/torchvision/datapoints/_bounding_box.py b/torchvision/datapoints/_bounding_box.py
index b3dc46348bc..c7300eb18b4 100644
--- a/torchvision/datapoints/_bounding_box.py
+++ b/torchvision/datapoints/_bounding_box.py
@@ -9,7 +9,7 @@
 from ._datapoint import _FillTypeJIT, Datapoint
 
 
-class BoundingBoxFormat(Enum):
+class BBoxFormat(Enum):
     """[BETA] Coordinate format of a bounding box.
 
     Available formats are
@@ -24,12 +24,12 @@ class BoundingBoxFormat(Enum):
     CXCYWH = "CXCYWH"
 
 
-class BoundingBoxes(Datapoint):
+class BBoxes(Datapoint):
     """[BETA] :class:`torch.Tensor` subclass for bounding boxes.
 
     Args:
         data: Any data that can be turned into a tensor with :func:`torch.as_tensor`.
-        format (BoundingBoxFormat, str): Format of the bounding box.
+        format (BBoxFormat, str): Format of the bounding box.
         spatial_size (two-tuple of ints): Height and width of the corresponding image or video.
         dtype (torch.dtype, optional): Desired data type of the bounding box. If omitted, will be inferred from
             ``data``.
@@ -39,55 +39,55 @@ class BoundingBoxes(Datapoint):
             ``data`` is a :class:`torch.Tensor`, the value is taken from it. Otherwise, defaults to ``False``.
     """
 
-    format: BoundingBoxFormat
+    format: BBoxFormat
     spatial_size: Tuple[int, int]
 
     @classmethod
-    def _wrap(cls, tensor: torch.Tensor, *, format: BoundingBoxFormat, spatial_size: Tuple[int, int]) -> BoundingBoxes:
-        bounding_boxes = tensor.as_subclass(cls)
-        bounding_boxes.format = format
-        bounding_boxes.spatial_size = spatial_size
-        return bounding_boxes
+    def _wrap(cls, tensor: torch.Tensor, *, format: BBoxFormat, spatial_size: Tuple[int, int]) -> BBoxes:
+        bboxes = tensor.as_subclass(cls)
+        bboxes.format = format
+        bboxes.spatial_size = spatial_size
+        return bboxes
 
     def __new__(
         cls,
         data: Any,
         *,
-        format: Union[BoundingBoxFormat, str],
+        format: Union[BBoxFormat, str],
         spatial_size: Tuple[int, int],
         dtype: Optional[torch.dtype] = None,
         device: Optional[Union[torch.device, str, int]] = None,
         requires_grad: Optional[bool] = None,
-    ) -> BoundingBoxes:
+    ) -> BBoxes:
         tensor = cls._to_tensor(data, dtype=dtype, device=device, requires_grad=requires_grad)
 
         if isinstance(format, str):
-            format = BoundingBoxFormat[format.upper()]
+            format = BBoxFormat[format.upper()]
 
         return cls._wrap(tensor, format=format, spatial_size=spatial_size)
 
     @classmethod
     def wrap_like(
         cls,
-        other: BoundingBoxes,
+        other: BBoxes,
         tensor: torch.Tensor,
         *,
-        format: Optional[BoundingBoxFormat] = None,
+        format: Optional[BBoxFormat] = None,
         spatial_size: Optional[Tuple[int, int]] = None,
-    ) -> BoundingBoxes:
-        """Wrap a :class:`torch.Tensor` as :class:`BoundingBoxes` from a reference.
+    ) -> BBoxes:
+        """Wrap a :class:`torch.Tensor` as :class:`BBoxes` from a reference.
 
         Args:
-            other (BoundingBoxes): Reference bounding box.
-            tensor (Tensor): Tensor to be wrapped as :class:`BoundingBoxes`
-            format (BoundingBoxFormat, str, optional): Format of the bounding box.  If omitted, it is taken from the
+            other (BBoxes): Reference bounding box.
+            tensor (Tensor): Tensor to be wrapped as :class:`BBoxes`
+            format (BBoxFormat, str, optional): Format of the bounding box.  If omitted, it is taken from the
                 reference.
             spatial_size (two-tuple of ints, optional): Height and width of the corresponding image or video. If
                 omitted, it is taken from the reference.
 
         """
         if isinstance(format, str):
-            format = BoundingBoxFormat[format.upper()]
+            format = BBoxFormat[format.upper()]
 
         return cls._wrap(
             tensor,
@@ -98,17 +98,17 @@ def wrap_like(
     def __repr__(self, *, tensor_contents: Any = None) -> str:  # type: ignore[override]
         return self._make_repr(format=self.format, spatial_size=self.spatial_size)
 
-    def horizontal_flip(self) -> BoundingBoxes:
-        output = self._F.horizontal_flip_bounding_boxes(
+    def horizontal_flip(self) -> BBoxes:
+        output = self._F.horizontal_flip_bboxes(
             self.as_subclass(torch.Tensor), format=self.format, spatial_size=self.spatial_size
         )
-        return BoundingBoxes.wrap_like(self, output)
+        return BBoxes.wrap_like(self, output)
 
-    def vertical_flip(self) -> BoundingBoxes:
-        output = self._F.vertical_flip_bounding_boxes(
+    def vertical_flip(self) -> BBoxes:
+        output = self._F.vertical_flip_bboxes(
             self.as_subclass(torch.Tensor), format=self.format, spatial_size=self.spatial_size
         )
-        return BoundingBoxes.wrap_like(self, output)
+        return BBoxes.wrap_like(self, output)
 
     def resize(  # type: ignore[override]
         self,
@@ -116,26 +116,26 @@ def resize(  # type: ignore[override]
         interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
         max_size: Optional[int] = None,
         antialias: Optional[Union[str, bool]] = "warn",
-    ) -> BoundingBoxes:
-        output, spatial_size = self._F.resize_bounding_boxes(
+    ) -> BBoxes:
+        output, spatial_size = self._F.resize_bboxes(
             self.as_subclass(torch.Tensor),
             spatial_size=self.spatial_size,
             size=size,
             max_size=max_size,
         )
-        return BoundingBoxes.wrap_like(self, output, spatial_size=spatial_size)
+        return BBoxes.wrap_like(self, output, spatial_size=spatial_size)
 
-    def crop(self, top: int, left: int, height: int, width: int) -> BoundingBoxes:
-        output, spatial_size = self._F.crop_bounding_boxes(
+    def crop(self, top: int, left: int, height: int, width: int) -> BBoxes:
+        output, spatial_size = self._F.crop_bboxes(
             self.as_subclass(torch.Tensor), self.format, top=top, left=left, height=height, width=width
         )
-        return BoundingBoxes.wrap_like(self, output, spatial_size=spatial_size)
+        return BBoxes.wrap_like(self, output, spatial_size=spatial_size)
 
-    def center_crop(self, output_size: List[int]) -> BoundingBoxes:
-        output, spatial_size = self._F.center_crop_bounding_boxes(
+    def center_crop(self, output_size: List[int]) -> BBoxes:
+        output, spatial_size = self._F.center_crop_bboxes(
             self.as_subclass(torch.Tensor), format=self.format, spatial_size=self.spatial_size, output_size=output_size
         )
-        return BoundingBoxes.wrap_like(self, output, spatial_size=spatial_size)
+        return BBoxes.wrap_like(self, output, spatial_size=spatial_size)
 
     def resized_crop(
         self,
@@ -146,26 +146,26 @@ def resized_crop(
         size: List[int],
         interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
         antialias: Optional[Union[str, bool]] = "warn",
-    ) -> BoundingBoxes:
-        output, spatial_size = self._F.resized_crop_bounding_boxes(
+    ) -> BBoxes:
+        output, spatial_size = self._F.resized_crop_bboxes(
             self.as_subclass(torch.Tensor), self.format, top, left, height, width, size=size
         )
-        return BoundingBoxes.wrap_like(self, output, spatial_size=spatial_size)
+        return BBoxes.wrap_like(self, output, spatial_size=spatial_size)
 
     def pad(
         self,
         padding: Union[int, Sequence[int]],
         fill: Optional[Union[int, float, List[float]]] = None,
         padding_mode: str = "constant",
-    ) -> BoundingBoxes:
-        output, spatial_size = self._F.pad_bounding_boxes(
+    ) -> BBoxes:
+        output, spatial_size = self._F.pad_bboxes(
             self.as_subclass(torch.Tensor),
             format=self.format,
             spatial_size=self.spatial_size,
             padding=padding,
             padding_mode=padding_mode,
         )
-        return BoundingBoxes.wrap_like(self, output, spatial_size=spatial_size)
+        return BBoxes.wrap_like(self, output, spatial_size=spatial_size)
 
     def rotate(
         self,
@@ -174,8 +174,8 @@ def rotate(
         expand: bool = False,
         center: Optional[List[float]] = None,
         fill: _FillTypeJIT = None,
-    ) -> BoundingBoxes:
-        output, spatial_size = self._F.rotate_bounding_boxes(
+    ) -> BBoxes:
+        output, spatial_size = self._F.rotate_bboxes(
             self.as_subclass(torch.Tensor),
             format=self.format,
             spatial_size=self.spatial_size,
@@ -183,7 +183,7 @@ def rotate(
             expand=expand,
             center=center,
         )
-        return BoundingBoxes.wrap_like(self, output, spatial_size=spatial_size)
+        return BBoxes.wrap_like(self, output, spatial_size=spatial_size)
 
     def affine(
         self,
@@ -194,8 +194,8 @@ def affine(
         interpolation: Union[InterpolationMode, int] = InterpolationMode.NEAREST,
         fill: _FillTypeJIT = None,
         center: Optional[List[float]] = None,
-    ) -> BoundingBoxes:
-        output = self._F.affine_bounding_boxes(
+    ) -> BBoxes:
+        output = self._F.affine_bboxes(
             self.as_subclass(torch.Tensor),
             self.format,
             self.spatial_size,
@@ -205,7 +205,7 @@ def affine(
             shear=shear,
             center=center,
         )
-        return BoundingBoxes.wrap_like(self, output)
+        return BBoxes.wrap_like(self, output)
 
     def perspective(
         self,
@@ -214,8 +214,8 @@ def perspective(
         interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
         fill: _FillTypeJIT = None,
         coefficients: Optional[List[float]] = None,
-    ) -> BoundingBoxes:
-        output = self._F.perspective_bounding_boxes(
+    ) -> BBoxes:
+        output = self._F.perspective_bboxes(
             self.as_subclass(torch.Tensor),
             format=self.format,
             spatial_size=self.spatial_size,
@@ -223,15 +223,15 @@ def perspective(
             endpoints=endpoints,
             coefficients=coefficients,
         )
-        return BoundingBoxes.wrap_like(self, output)
+        return BBoxes.wrap_like(self, output)
 
     def elastic(
         self,
         displacement: torch.Tensor,
         interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
         fill: _FillTypeJIT = None,
-    ) -> BoundingBoxes:
-        output = self._F.elastic_bounding_boxes(
+    ) -> BBoxes:
+        output = self._F.elastic_bboxes(
             self.as_subclass(torch.Tensor), self.format, self.spatial_size, displacement=displacement
         )
-        return BoundingBoxes.wrap_like(self, output)
+        return BBoxes.wrap_like(self, output)
diff --git a/torchvision/datapoints/_datapoint.py b/torchvision/datapoints/_datapoint.py
index 35072159d7f..a813159064f 100644
--- a/torchvision/datapoints/_datapoint.py
+++ b/torchvision/datapoints/_datapoint.py
@@ -138,8 +138,8 @@ def __deepcopy__(self: D, memo: Dict[int, Any]) -> D:
         # *not* happen for `deepcopy(Tensor)`. A side-effect from detaching is that the `Tensor.requires_grad`
         # attribute is cleared, so we need to refill it before we return.
         # Note: We don't explicitly handle deep-copying of the metadata here. The only metadata we currently have is
-        # `BoundingBoxes.format` and `BoundingBoxes.spatial_size`, which are immutable and thus implicitly deep-copied by
-        # `BoundingBoxes.clone()`.
+        # `BBoxes.format` and `BBoxes.spatial_size`, which are immutable and thus implicitly deep-copied by
+        # `BBoxes.clone()`.
         return self.detach().clone().requires_grad_(self.requires_grad)  # type: ignore[return-value]
 
     def horizontal_flip(self) -> Datapoint:
diff --git a/torchvision/datapoints/_dataset_wrapper.py b/torchvision/datapoints/_dataset_wrapper.py
index 3b162b69cbf..95b3aefae94 100644
--- a/torchvision/datapoints/_dataset_wrapper.py
+++ b/torchvision/datapoints/_dataset_wrapper.py
@@ -44,7 +44,7 @@ def wrap_dataset_for_transforms_v2(dataset, target_keys=None):
           the target and wrap the data in the corresponding ``torchvision.datapoints``. The original keys are
           preserved. If ``target_keys`` is ommitted, returns only the values for the ``"boxes"`` and ``"labels"``.
         * :class:`~torchvision.datasets.CelebA`: The target for ``target_type="bbox"`` is converted to the ``XYXY``
-          coordinate format and wrapped into a :class:`~torchvision.datapoints.BoundingBoxes` datapoint.
+          coordinate format and wrapped into a :class:`~torchvision.datapoints.BBoxes` datapoint.
         * :class:`~torchvision.datasets.Kitti`: Instead returning the target as list of dicts, the wrapper returns a
           dict of lists. In addition, the key-value-pairs ``"boxes"`` and ``"labels"`` are added and wrap the data
           in the corresponding ``torchvision.datapoints``. The original keys are preserved. If ``target_keys`` is
@@ -56,7 +56,7 @@ def wrap_dataset_for_transforms_v2(dataset, target_keys=None):
           a dictionary with the key-value-pairs ``"masks"`` (as :class:`~torchvision.datapoints.Mask` datapoint) and
           ``"labels"``.
         * :class:`~torchvision.datasets.WIDERFace`: The value for key ``"bbox"`` in the target is converted to ``XYXY``
-          coordinate format and wrapped into a :class:`~torchvision.datapoints.BoundingBoxes` datapoint.
+          coordinate format and wrapped into a :class:`~torchvision.datapoints.BBoxes` datapoint.
 
     Image classification datasets
 
@@ -360,13 +360,13 @@ def wrapper(idx, sample):
             target["image_id"] = image_id
 
         if "boxes" in target_keys:
-            target["boxes"] = F.convert_format_bounding_boxes(
-                datapoints.BoundingBoxes(
+            target["boxes"] = F.convert_format_bboxes(
+                datapoints.BBoxes(
                     batched_target["bbox"],
-                    format=datapoints.BoundingBoxFormat.XYWH,
+                    format=datapoints.BBoxFormat.XYWH,
                     spatial_size=spatial_size,
                 ),
-                new_format=datapoints.BoundingBoxFormat.XYXY,
+                new_format=datapoints.BBoxFormat.XYXY,
             )
 
         if "masks" in target_keys:
@@ -442,12 +442,12 @@ def wrapper(idx, sample):
             target = {}
 
         if "boxes" in target_keys:
-            target["boxes"] = datapoints.BoundingBoxes(
+            target["boxes"] = datapoints.BBoxes(
                 [
                     [int(bndbox[part]) for part in ("xmin", "ymin", "xmax", "ymax")]
                     for bndbox in batched_instances["bndbox"]
                 ],
-                format=datapoints.BoundingBoxFormat.XYXY,
+                format=datapoints.BBoxFormat.XYXY,
                 spatial_size=(image.height, image.width),
             )
 
@@ -481,13 +481,13 @@ def wrapper(idx, sample):
             target,
             target_types=dataset.target_type,
             type_wrappers={
-                "bbox": lambda item: F.convert_format_bounding_boxes(
-                    datapoints.BoundingBoxes(
+                "bbox": lambda item: F.convert_format_bboxes(
+                    datapoints.BBoxes(
                         item,
-                        format=datapoints.BoundingBoxFormat.XYWH,
+                        format=datapoints.BBoxFormat.XYWH,
                         spatial_size=(image.height, image.width),
                     ),
-                    new_format=datapoints.BoundingBoxFormat.XYXY,
+                    new_format=datapoints.BBoxFormat.XYXY,
                 ),
             },
         )
@@ -532,9 +532,9 @@ def wrapper(idx, sample):
         target = {}
 
         if "boxes" in target_keys:
-            target["boxes"] = datapoints.BoundingBoxes(
+            target["boxes"] = datapoints.BBoxes(
                 batched_target["bbox"],
-                format=datapoints.BoundingBoxFormat.XYXY,
+                format=datapoints.BBoxFormat.XYXY,
                 spatial_size=(image.height, image.width),
             )
 
@@ -628,11 +628,11 @@ def wrapper(idx, sample):
         target = {key: target[key] for key in target_keys}
 
         if "bbox" in target_keys:
-            target["bbox"] = F.convert_format_bounding_boxes(
-                datapoints.BoundingBoxes(
-                    target["bbox"], format=datapoints.BoundingBoxFormat.XYWH, spatial_size=(image.height, image.width)
+            target["bbox"] = F.convert_format_bboxes(
+                datapoints.BBoxes(
+                    target["bbox"], format=datapoints.BBoxFormat.XYWH, spatial_size=(image.height, image.width)
                 ),
-                new_format=datapoints.BoundingBoxFormat.XYXY,
+                new_format=datapoints.BBoxFormat.XYXY,
             )
 
         return image, target
diff --git a/torchvision/ops/boxes.py b/torchvision/ops/boxes.py
index a541f8d880a..63687a13457 100644
--- a/torchvision/ops/boxes.py
+++ b/torchvision/ops/boxes.py
@@ -404,14 +404,14 @@ def masks_to_boxes(masks: torch.Tensor) -> torch.Tensor:
 
     n = masks.shape[0]
 
-    bounding_boxes = torch.zeros((n, 4), device=masks.device, dtype=torch.float)
+    bboxes = torch.zeros((n, 4), device=masks.device, dtype=torch.float)
 
     for index, mask in enumerate(masks):
         y, x = torch.where(mask != 0)
 
-        bounding_boxes[index, 0] = torch.min(x)
-        bounding_boxes[index, 1] = torch.min(y)
-        bounding_boxes[index, 2] = torch.max(x)
-        bounding_boxes[index, 3] = torch.max(y)
+        bboxes[index, 0] = torch.min(x)
+        bboxes[index, 1] = torch.min(y)
+        bboxes[index, 2] = torch.max(x)
+        bboxes[index, 3] = torch.max(y)
 
-    return bounding_boxes
+    return bboxes
diff --git a/torchvision/prototype/datasets/_builtin/caltech.py b/torchvision/prototype/datasets/_builtin/caltech.py
index 631de46b2b6..f5080d80c01 100644
--- a/torchvision/prototype/datasets/_builtin/caltech.py
+++ b/torchvision/prototype/datasets/_builtin/caltech.py
@@ -6,7 +6,7 @@
 
 import torch
 from torchdata.datapipes.iter import Filter, IterDataPipe, IterKeyZipper, Mapper
-from torchvision.datapoints import BoundingBoxes
+from torchvision.datapoints import BBoxes
 from torchvision.prototype.datapoints import Label
 from torchvision.prototype.datasets.utils import Dataset, EncodedImage, GDriveResource, OnlineResource
 from torchvision.prototype.datasets.utils._internal import (
@@ -112,7 +112,7 @@ def _prepare_sample(
             image_path=image_path,
             image=image,
             ann_path=ann_path,
-            bounding_boxes=BoundingBoxes(
+            bboxes=BBoxes(
                 ann["box_coord"].astype(np.int64).squeeze()[[2, 0, 3, 1]],
                 format="xyxy",
                 spatial_size=image.spatial_size,
diff --git a/torchvision/prototype/datasets/_builtin/celeba.py b/torchvision/prototype/datasets/_builtin/celeba.py
index 9112a80357c..c4109b8c7ff 100644
--- a/torchvision/prototype/datasets/_builtin/celeba.py
+++ b/torchvision/prototype/datasets/_builtin/celeba.py
@@ -4,7 +4,7 @@
 
 import torch
 from torchdata.datapipes.iter import Filter, IterDataPipe, IterKeyZipper, Mapper, Zipper
-from torchvision.datapoints import BoundingBoxes
+from torchvision.datapoints import BBoxes
 from torchvision.prototype.datapoints import Label
 from torchvision.prototype.datasets.utils import Dataset, EncodedImage, GDriveResource, OnlineResource
 from torchvision.prototype.datasets.utils._internal import (
@@ -100,7 +100,7 @@ def _resources(self) -> List[OnlineResource]:
             sha256="f0e5da289d5ccf75ffe8811132694922b60f2af59256ed362afa03fefba324d0",
             file_name="list_attr_celeba.txt",
         )
-        bounding_boxes = GDriveResource(
+        bboxes = GDriveResource(
             "0B7EVK8r0v71pbThiMVRxWXZ4dU0",
             sha256="7487a82e57c4bb956c5445ae2df4a91ffa717e903c5fa22874ede0820c8ec41b",
             file_name="list_bbox_celeba.txt",
@@ -110,7 +110,7 @@ def _resources(self) -> List[OnlineResource]:
             sha256="6c02a87569907f6db2ba99019085697596730e8129f67a3d61659f198c48d43b",
             file_name="list_landmarks_align_celeba.txt",
         )
-        return [splits, images, identities, attributes, bounding_boxes, landmarks]
+        return [splits, images, identities, attributes, bboxes, landmarks]
 
     def _filter_split(self, data: Tuple[str, Dict[str, str]]) -> bool:
         split_id = {
@@ -137,15 +137,15 @@ def _prepare_sample(
         path, buffer = image_data
 
         image = EncodedImage.from_file(buffer)
-        (_, identity), (_, attributes), (_, bounding_boxes), (_, landmarks) = ann_data
+        (_, identity), (_, attributes), (_, bboxes), (_, landmarks) = ann_data
 
         return dict(
             path=path,
             image=image,
             identity=Label(int(identity["identity"])),
             attributes={attr: value == "1" for attr, value in attributes.items()},
-            bounding_boxes=BoundingBoxes(
-                [int(bounding_boxes[key]) for key in ("x_1", "y_1", "width", "height")],
+            bboxes=BBoxes(
+                [int(bboxes[key]) for key in ("x_1", "y_1", "width", "height")],
                 format="xywh",
                 spatial_size=image.spatial_size,
             ),
@@ -156,7 +156,7 @@ def _prepare_sample(
         )
 
     def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
-        splits_dp, images_dp, identities_dp, attributes_dp, bounding_boxes_dp, landmarks_dp = resource_dps
+        splits_dp, images_dp, identities_dp, attributes_dp, bboxes_dp, landmarks_dp = resource_dps
 
         splits_dp = CelebACSVParser(splits_dp, fieldnames=("image_id", "split_id"))
         splits_dp = Filter(splits_dp, self._filter_split)
@@ -169,7 +169,7 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str,
                 for dp, fieldnames in (
                     (identities_dp, ("image_id", "identity")),
                     (attributes_dp, None),
-                    (bounding_boxes_dp, None),
+                    (bboxes_dp, None),
                     (landmarks_dp, None),
                 )
             ]
diff --git a/torchvision/prototype/datasets/_builtin/coco.py b/torchvision/prototype/datasets/_builtin/coco.py
index abf19acec0d..0c7da1a5dc0 100644
--- a/torchvision/prototype/datasets/_builtin/coco.py
+++ b/torchvision/prototype/datasets/_builtin/coco.py
@@ -14,7 +14,7 @@
     Mapper,
     UnBatcher,
 )
-from torchvision.datapoints import BoundingBoxes, Mask
+from torchvision.datapoints import BBoxes, Mask
 from torchvision.prototype.datapoints import Label
 from torchvision.prototype.datasets.utils import Dataset, EncodedImage, HttpResource, OnlineResource
 from torchvision.prototype.datasets.utils._internal import (
@@ -126,7 +126,7 @@ def _decode_instances_anns(self, anns: List[Dict[str, Any]], image_meta: Dict[st
             ),
             areas=torch.as_tensor([ann["area"] for ann in anns]),
             crowds=torch.as_tensor([ann["iscrowd"] for ann in anns], dtype=torch.bool),
-            bounding_boxes=BoundingBoxes(
+            bboxes=BBoxes(
                 [ann["bbox"] for ann in anns],
                 format="xywh",
                 spatial_size=spatial_size,
diff --git a/torchvision/prototype/datasets/_builtin/cub200.py b/torchvision/prototype/datasets/_builtin/cub200.py
index b301c6ba030..11218859470 100644
--- a/torchvision/prototype/datasets/_builtin/cub200.py
+++ b/torchvision/prototype/datasets/_builtin/cub200.py
@@ -15,7 +15,7 @@
     Mapper,
 )
 from torchdata.datapipes.map import IterToMapConverter
-from torchvision.datapoints import BoundingBoxes
+from torchvision.datapoints import BBoxes
 from torchvision.prototype.datapoints import Label
 from torchvision.prototype.datasets.utils import Dataset, EncodedImage, GDriveResource, OnlineResource
 from torchvision.prototype.datasets.utils._internal import (
@@ -112,7 +112,7 @@ def _2011_classify_archive(self, data: Tuple[str, Any]) -> Optional[int]:
             return 1
         elif path.name == "images.txt":
             return 2
-        elif path.name == "bounding_boxes.txt":
+        elif path.name == "bboxes.txt":
             return 3
         else:
             return None
@@ -134,12 +134,10 @@ def _2011_segmentation_key(self, data: Tuple[str, Any]) -> str:
     def _2011_prepare_ann(
         self, data: Tuple[str, Tuple[List[str], Tuple[str, BinaryIO]]], spatial_size: Tuple[int, int]
     ) -> Dict[str, Any]:
-        _, (bounding_boxes_data, segmentation_data) = data
+        _, (bboxes_data, segmentation_data) = data
         segmentation_path, segmentation_buffer = segmentation_data
         return dict(
-            bounding_boxes=BoundingBoxes(
-                [float(part) for part in bounding_boxes_data[1:]], format="xywh", spatial_size=spatial_size
-            ),
+            bboxes=BBoxes([float(part) for part in bboxes_data[1:]], format="xywh", spatial_size=spatial_size),
             segmentation_path=segmentation_path,
             segmentation=EncodedImage.from_file(segmentation_buffer),
         )
@@ -158,7 +156,7 @@ def _2010_prepare_ann(
         content = read_mat(buffer)
         return dict(
             ann_path=path,
-            bounding_boxes=BoundingBoxes(
+            bboxes=BBoxes(
                 [int(content["bbox"][coord]) for coord in ("left", "bottom", "right", "top")],
                 format="xyxy",
                 spatial_size=spatial_size,
@@ -191,7 +189,7 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str,
         prepare_ann_fn: Callable
         if self._year == "2011":
             archive_dp, segmentations_dp = resource_dps
-            images_dp, split_dp, image_files_dp, bounding_boxes_dp = Demultiplexer(
+            images_dp, split_dp, image_files_dp, bboxes_dp = Demultiplexer(
                 archive_dp, 4, self._2011_classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE
             )
 
@@ -204,11 +202,11 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str,
             split_dp = Mapper(split_dp, getitem(0))
             split_dp = Mapper(split_dp, image_files_map.__getitem__)
 
-            bounding_boxes_dp = CSVParser(bounding_boxes_dp, dialect="cub200")
-            bounding_boxes_dp = Mapper(bounding_boxes_dp, image_files_map.__getitem__, input_col=0)
+            bboxes_dp = CSVParser(bboxes_dp, dialect="cub200")
+            bboxes_dp = Mapper(bboxes_dp, image_files_map.__getitem__, input_col=0)
 
             anns_dp = IterKeyZipper(
-                bounding_boxes_dp,
+                bboxes_dp,
                 segmentations_dp,
                 key_fn=getitem(0),
                 ref_key_fn=self._2011_segmentation_key,
diff --git a/torchvision/prototype/datasets/_builtin/gtsrb.py b/torchvision/prototype/datasets/_builtin/gtsrb.py
index 34651fcfce3..e0c5cc3f91f 100644
--- a/torchvision/prototype/datasets/_builtin/gtsrb.py
+++ b/torchvision/prototype/datasets/_builtin/gtsrb.py
@@ -2,7 +2,7 @@
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 from torchdata.datapipes.iter import CSVDictParser, Demultiplexer, Filter, IterDataPipe, Mapper, Zipper
-from torchvision.datapoints import BoundingBoxes
+from torchvision.datapoints import BBoxes
 from torchvision.prototype.datapoints import Label
 from torchvision.prototype.datasets.utils import Dataset, EncodedImage, HttpResource, OnlineResource
 from torchvision.prototype.datasets.utils._internal import (
@@ -76,7 +76,7 @@ def _prepare_sample(self, data: Tuple[Tuple[str, Any], Dict[str, Any]]) -> Dict[
         (path, buffer), csv_info = data
         label = int(csv_info["ClassId"])
 
-        bounding_boxes = BoundingBoxes(
+        bboxes = BBoxes(
             [int(csv_info[k]) for k in ("Roi.X1", "Roi.Y1", "Roi.X2", "Roi.Y2")],
             format="xyxy",
             spatial_size=(int(csv_info["Height"]), int(csv_info["Width"])),
@@ -86,7 +86,7 @@ def _prepare_sample(self, data: Tuple[Tuple[str, Any], Dict[str, Any]]) -> Dict[
             "path": path,
             "image": EncodedImage.from_file(buffer),
             "label": Label(label, categories=self._categories),
-            "bounding_boxes": bounding_boxes,
+            "bboxes": bboxes,
         }
 
     def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
diff --git a/torchvision/prototype/datasets/_builtin/stanford_cars.py b/torchvision/prototype/datasets/_builtin/stanford_cars.py
index aefbbede2e3..11ed5b7117b 100644
--- a/torchvision/prototype/datasets/_builtin/stanford_cars.py
+++ b/torchvision/prototype/datasets/_builtin/stanford_cars.py
@@ -2,7 +2,7 @@
 from typing import Any, BinaryIO, Dict, Iterator, List, Tuple, Union
 
 from torchdata.datapipes.iter import Filter, IterDataPipe, Mapper, Zipper
-from torchvision.datapoints import BoundingBoxes
+from torchvision.datapoints import BBoxes
 from torchvision.prototype.datapoints import Label
 from torchvision.prototype.datasets.utils import Dataset, EncodedImage, HttpResource, OnlineResource
 from torchvision.prototype.datasets.utils._internal import (
@@ -90,7 +90,7 @@ def _prepare_sample(self, data: Tuple[Tuple[str, BinaryIO], Tuple[int, int, int,
             path=path,
             image=image,
             label=Label(target[4] - 1, categories=self._categories),
-            bounding_boxes=BoundingBoxes(target[:4], format="xyxy", spatial_size=image.spatial_size),
+            bboxes=BBoxes(target[:4], format="xyxy", spatial_size=image.spatial_size),
         )
 
     def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
diff --git a/torchvision/prototype/datasets/_builtin/voc.py b/torchvision/prototype/datasets/_builtin/voc.py
index 53dfbd185bc..782d947925f 100644
--- a/torchvision/prototype/datasets/_builtin/voc.py
+++ b/torchvision/prototype/datasets/_builtin/voc.py
@@ -5,7 +5,7 @@
 from xml.etree import ElementTree
 
 from torchdata.datapipes.iter import Demultiplexer, Filter, IterDataPipe, IterKeyZipper, LineReader, Mapper
-from torchvision.datapoints import BoundingBoxes
+from torchvision.datapoints import BBoxes
 from torchvision.datasets import VOCDetection
 from torchvision.prototype.datapoints import Label
 from torchvision.prototype.datasets.utils import Dataset, EncodedImage, HttpResource, OnlineResource
@@ -103,7 +103,7 @@ def _prepare_detection_ann(self, buffer: BinaryIO) -> Dict[str, Any]:
         anns = self._parse_detection_ann(buffer)
         instances = anns["object"]
         return dict(
-            bounding_boxes=BoundingBoxes(
+            bboxes=BBoxes(
                 [
                     [int(instance["bndbox"][part]) for part in ("xmin", "ymin", "xmax", "ymax")]
                     for instance in instances
diff --git a/torchvision/prototype/transforms/_augment.py b/torchvision/prototype/transforms/_augment.py
index 0e50fb75588..ce5a9d703cc 100644
--- a/torchvision/prototype/transforms/_augment.py
+++ b/torchvision/prototype/transforms/_augment.py
@@ -26,7 +26,7 @@ def _check_inputs(self, flat_inputs: List[Any]) -> None:
             and has_any(flat_inputs, proto_datapoints.OneHotLabel)
         ):
             raise TypeError(f"{type(self).__name__}() is only defined for tensor images/videos and one-hot labels.")
-        if has_any(flat_inputs, PIL.Image.Image, datapoints.BoundingBoxes, datapoints.Mask, proto_datapoints.Label):
+        if has_any(flat_inputs, PIL.Image.Image, datapoints.BBoxes, datapoints.Mask, proto_datapoints.Label):
             raise TypeError(
                 f"{type(self).__name__}() does not support PIL images, bounding boxes, masks and plain labels."
             )
@@ -175,8 +175,8 @@ def _copy_paste(
         # There is a similar +1 in other reference implementations:
         # https://github.com/pytorch/vision/blob/b6feccbc4387766b76a3e22b13815dbbbfa87c0f/torchvision/models/detection/roi_heads.py#L418-L422
         xyxy_boxes[:, 2:] += 1
-        boxes = F.convert_format_bounding_boxes(
-            xyxy_boxes, old_format=datapoints.BoundingBoxFormat.XYXY, new_format=bbox_format, inplace=True
+        boxes = F.convert_format_bboxes(
+            xyxy_boxes, old_format=datapoints.BBoxFormat.XYXY, new_format=bbox_format, inplace=True
         )
         out_target["boxes"] = torch.cat([boxes, paste_boxes])
 
@@ -184,8 +184,8 @@ def _copy_paste(
         out_target["labels"] = torch.cat([labels, paste_labels])
 
         # Check for degenerated boxes and remove them
-        boxes = F.convert_format_bounding_boxes(
-            out_target["boxes"], old_format=bbox_format, new_format=datapoints.BoundingBoxFormat.XYXY
+        boxes = F.convert_format_bboxes(
+            out_target["boxes"], old_format=bbox_format, new_format=datapoints.BBoxFormat.XYXY
         )
         degenerate_boxes = boxes[:, 2:] <= boxes[:, :2]
         if degenerate_boxes.any():
@@ -201,14 +201,14 @@ def _extract_image_targets(
         self, flat_sample: List[Any]
     ) -> Tuple[List[datapoints._TensorImageType], List[Dict[str, Any]]]:
         # fetch all images, bboxes, masks and labels from unstructured input
-        # with List[image], List[BoundingBoxes], List[Mask], List[Label]
+        # with List[image], List[BBoxes], List[Mask], List[Label]
         images, bboxes, masks, labels = [], [], [], []
         for obj in flat_sample:
             if isinstance(obj, datapoints.Image) or is_simple_tensor(obj):
                 images.append(obj)
             elif isinstance(obj, PIL.Image.Image):
                 images.append(F.to_image_tensor(obj))
-            elif isinstance(obj, datapoints.BoundingBoxes):
+            elif isinstance(obj, datapoints.BBoxes):
                 bboxes.append(obj)
             elif isinstance(obj, datapoints.Mask):
                 masks.append(obj)
@@ -218,7 +218,7 @@ def _extract_image_targets(
         if not (len(images) == len(bboxes) == len(masks) == len(labels)):
             raise TypeError(
                 f"{type(self).__name__}() requires input sample to contain equal sized list of Images, "
-                "BoundingBoxeses, Masks and Labels or OneHotLabels."
+                "BBoxeses, Masks and Labels or OneHotLabels."
             )
 
         targets = []
@@ -244,8 +244,8 @@ def _insert_outputs(
             elif is_simple_tensor(obj):
                 flat_sample[i] = output_images[c0]
                 c0 += 1
-            elif isinstance(obj, datapoints.BoundingBoxes):
-                flat_sample[i] = datapoints.BoundingBoxes.wrap_like(obj, output_targets[c1]["boxes"])
+            elif isinstance(obj, datapoints.BBoxes):
+                flat_sample[i] = datapoints.BBoxes.wrap_like(obj, output_targets[c1]["boxes"])
                 c1 += 1
             elif isinstance(obj, datapoints.Mask):
                 flat_sample[i] = datapoints.Mask.wrap_like(obj, output_targets[c2]["masks"])
diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py
index b328c132070..98b531a35c9 100644
--- a/torchvision/prototype/transforms/_geometry.py
+++ b/torchvision/prototype/transforms/_geometry.py
@@ -7,7 +7,7 @@
 from torchvision.prototype.datapoints import Label, OneHotLabel
 from torchvision.transforms.v2 import functional as F, Transform
 from torchvision.transforms.v2._utils import _setup_fill_arg, _setup_size
-from torchvision.transforms.v2.utils import has_any, is_simple_tensor, query_bounding_boxes, query_spatial_size
+from torchvision.transforms.v2.utils import has_any, is_simple_tensor, query_bboxes, query_spatial_size
 
 
 class FixedSizeCrop(Transform):
@@ -39,9 +39,9 @@ def _check_inputs(self, flat_inputs: List[Any]) -> None:
                 f"{type(self).__name__}() requires input sample to contain an tensor or PIL image or a Video."
             )
 
-        if has_any(flat_inputs, datapoints.BoundingBoxes) and not has_any(flat_inputs, Label, OneHotLabel):
+        if has_any(flat_inputs, datapoints.BBoxes) and not has_any(flat_inputs, Label, OneHotLabel):
             raise TypeError(
-                f"If a BoundingBoxes is contained in the input sample, "
+                f"If a BBoxes is contained in the input sample, "
                 f"{type(self).__name__}() also requires it to contain a Label or OneHotLabel."
             )
 
@@ -59,25 +59,25 @@ def _get_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
         top = int(offset_height * r)
         left = int(offset_width * r)
 
-        bounding_boxes: Optional[torch.Tensor]
+        bboxes: Optional[torch.Tensor]
         try:
-            bounding_boxes = query_bounding_boxes(flat_inputs)
+            bboxes = query_bboxes(flat_inputs)
         except ValueError:
-            bounding_boxes = None
+            bboxes = None
 
-        if needs_crop and bounding_boxes is not None:
-            format = bounding_boxes.format
-            bounding_boxes, spatial_size = F.crop_bounding_boxes(
-                bounding_boxes.as_subclass(torch.Tensor),
+        if needs_crop and bboxes is not None:
+            format = bboxes.format
+            bboxes, spatial_size = F.crop_bboxes(
+                bboxes.as_subclass(torch.Tensor),
                 format=format,
                 top=top,
                 left=left,
                 height=new_height,
                 width=new_width,
             )
-            bounding_boxes = F.clamp_bounding_boxes(bounding_boxes, format=format, spatial_size=spatial_size)
-            height_and_width = F.convert_format_bounding_boxes(
-                bounding_boxes, old_format=format, new_format=datapoints.BoundingBoxFormat.XYWH
+            bboxes = F.clamp_bboxes(bboxes, format=format, spatial_size=spatial_size)
+            height_and_width = F.convert_format_bboxes(
+                bboxes, old_format=format, new_format=datapoints.BBoxFormat.XYWH
             )[..., 2:]
             is_valid = torch.all(height_and_width > 0, dim=-1)
         else:
@@ -112,12 +112,10 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
         if params["is_valid"] is not None:
             if isinstance(inpt, (Label, OneHotLabel, datapoints.Mask)):
                 inpt = inpt.wrap_like(inpt, inpt[params["is_valid"]])  # type: ignore[arg-type]
-            elif isinstance(inpt, datapoints.BoundingBoxes):
-                inpt = datapoints.BoundingBoxes.wrap_like(
+            elif isinstance(inpt, datapoints.BBoxes):
+                inpt = datapoints.BBoxes.wrap_like(
                     inpt,
-                    F.clamp_bounding_boxes(
-                        inpt[params["is_valid"]], format=inpt.format, spatial_size=inpt.spatial_size
-                    ),
+                    F.clamp_bboxes(inpt[params["is_valid"]], format=inpt.format, spatial_size=inpt.spatial_size),
                 )
 
         if params["needs_pad"]:
diff --git a/torchvision/transforms/v2/__init__.py b/torchvision/transforms/v2/__init__.py
index b44f479c4b4..5840fcc4a2b 100644
--- a/torchvision/transforms/v2/__init__.py
+++ b/torchvision/transforms/v2/__init__.py
@@ -39,7 +39,7 @@
     ScaleJitter,
     TenCrop,
 )
-from ._meta import ClampBoundingBoxes, ConvertBoundingBoxFormat
+from ._meta import ClampBBoxes, ConvertBBoxFormat
 from ._misc import (
     ConvertImageDtype,
     GaussianBlur,
@@ -47,7 +47,7 @@
     Lambda,
     LinearTransformation,
     Normalize,
-    SanitizeBoundingBoxes,
+    SanitizeBBoxes,
     ToDtype,
 )
 from ._temporal import UniformTemporalSubsample
diff --git a/torchvision/transforms/v2/_augment.py b/torchvision/transforms/v2/_augment.py
index f9038c6af32..353214766e9 100644
--- a/torchvision/transforms/v2/_augment.py
+++ b/torchvision/transforms/v2/_augment.py
@@ -155,7 +155,7 @@ def forward(self, *inputs):
         flat_inputs, spec = tree_flatten(inputs)
         needs_transform_list = self._needs_transform_list(flat_inputs)
 
-        if has_any(flat_inputs, PIL.Image.Image, datapoints.BoundingBoxes, datapoints.Mask):
+        if has_any(flat_inputs, PIL.Image.Image, datapoints.BBoxes, datapoints.Mask):
             raise ValueError(f"{type(self).__name__}() does not support PIL images, bounding boxes and masks.")
 
         labels = self._labels_getter(inputs)
diff --git a/torchvision/transforms/v2/_auto_augment.py b/torchvision/transforms/v2/_auto_augment.py
index 785e1f6970b..48465b4b7c8 100644
--- a/torchvision/transforms/v2/_auto_augment.py
+++ b/torchvision/transforms/v2/_auto_augment.py
@@ -34,7 +34,7 @@ def _get_random_item(self, dct: Dict[str, Tuple[Callable, bool]]) -> Tuple[str,
     def _flatten_and_extract_image_or_video(
         self,
         inputs: Any,
-        unsupported_types: Tuple[Type, ...] = (datapoints.BoundingBoxes, datapoints.Mask),
+        unsupported_types: Tuple[Type, ...] = (datapoints.BBoxes, datapoints.Mask),
     ) -> Tuple[Tuple[List[Any], TreeSpec, int], Union[datapoints._ImageType, datapoints._VideoType]]:
         flat_inputs, spec = tree_flatten(inputs if len(inputs) > 1 else inputs[0])
         needs_transform_list = self._needs_transform_list(flat_inputs)
diff --git a/torchvision/transforms/v2/_geometry.py b/torchvision/transforms/v2/_geometry.py
index a64f7a40e4b..b9dc68b625e 100644
--- a/torchvision/transforms/v2/_geometry.py
+++ b/torchvision/transforms/v2/_geometry.py
@@ -22,7 +22,7 @@
     _setup_float_or_seq,
     _setup_size,
 )
-from .utils import has_all, has_any, is_simple_tensor, query_bounding_boxes, query_spatial_size
+from .utils import has_all, has_any, is_simple_tensor, query_bboxes, query_spatial_size
 
 
 class RandomHorizontalFlip(_RandomApplyTransform):
@@ -31,7 +31,7 @@ class RandomHorizontalFlip(_RandomApplyTransform):
     .. v2betastatus:: RandomHorizontalFlip transform
 
     If the input is a :class:`torch.Tensor` or a ``Datapoint`` (e.g. :class:`~torchvision.datapoints.Image`,
-    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BoundingBoxes` etc.)
+    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BBoxes` etc.)
     it can have arbitrary number of leading batch dimensions. For example,
     the image can have ``[..., C, H, W]`` shape. A bounding box can have ``[..., 4]`` shape.
 
@@ -51,7 +51,7 @@ class RandomVerticalFlip(_RandomApplyTransform):
     .. v2betastatus:: RandomVerticalFlip transform
 
     If the input is a :class:`torch.Tensor` or a ``Datapoint`` (e.g. :class:`~torchvision.datapoints.Image`,
-    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BoundingBoxes` etc.)
+    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BBoxes` etc.)
     it can have arbitrary number of leading batch dimensions. For example,
     the image can have ``[..., C, H, W]`` shape. A bounding box can have ``[..., 4]`` shape.
 
@@ -71,7 +71,7 @@ class Resize(Transform):
     .. v2betastatus:: Resize transform
 
     If the input is a :class:`torch.Tensor` or a ``Datapoint`` (e.g. :class:`~torchvision.datapoints.Image`,
-    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BoundingBoxes` etc.)
+    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BBoxes` etc.)
     it can have arbitrary number of leading batch dimensions. For example,
     the image can have ``[..., C, H, W]`` shape. A bounding box can have ``[..., 4]`` shape.
 
@@ -165,7 +165,7 @@ class CenterCrop(Transform):
     .. v2betastatus:: CenterCrop transform
 
     If the input is a :class:`torch.Tensor` or a ``Datapoint`` (e.g. :class:`~torchvision.datapoints.Image`,
-    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BoundingBoxes` etc.)
+    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BBoxes` etc.)
     it can have arbitrary number of leading batch dimensions. For example,
     the image can have ``[..., C, H, W]`` shape. A bounding box can have ``[..., 4]`` shape.
 
@@ -193,7 +193,7 @@ class RandomResizedCrop(Transform):
     .. v2betastatus:: RandomResizedCrop transform
 
     If the input is a :class:`torch.Tensor` or a ``Datapoint`` (e.g. :class:`~torchvision.datapoints.Image`,
-    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BoundingBoxes` etc.)
+    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BBoxes` etc.)
     it can have arbitrary number of leading batch dimensions. For example,
     the image can have ``[..., C, H, W]`` shape. A bounding box can have ``[..., 4]`` shape.
 
@@ -371,8 +371,8 @@ def _transform(
         return F.five_crop(inpt, self.size)
 
     def _check_inputs(self, flat_inputs: List[Any]) -> None:
-        if has_any(flat_inputs, datapoints.BoundingBoxes, datapoints.Mask):
-            raise TypeError(f"BoundingBoxes'es and Mask's are not supported by {type(self).__name__}()")
+        if has_any(flat_inputs, datapoints.BBoxes, datapoints.Mask):
+            raise TypeError(f"BBoxes'es and Mask's are not supported by {type(self).__name__}()")
 
 
 class TenCrop(Transform):
@@ -414,8 +414,8 @@ def __init__(self, size: Union[int, Sequence[int]], vertical_flip: bool = False)
         self.vertical_flip = vertical_flip
 
     def _check_inputs(self, flat_inputs: List[Any]) -> None:
-        if has_any(flat_inputs, datapoints.BoundingBoxes, datapoints.Mask):
-            raise TypeError(f"BoundingBoxes'es and Mask's are not supported by {type(self).__name__}()")
+        if has_any(flat_inputs, datapoints.BBoxes, datapoints.Mask):
+            raise TypeError(f"BBoxes'es and Mask's are not supported by {type(self).__name__}()")
 
     def _transform(
         self, inpt: Union[datapoints._ImageType, datapoints._VideoType], params: Dict[str, Any]
@@ -440,7 +440,7 @@ class Pad(Transform):
     .. v2betastatus:: Pad transform
 
     If the input is a :class:`torch.Tensor` or a ``Datapoint`` (e.g. :class:`~torchvision.datapoints.Image`,
-    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BoundingBoxes` etc.)
+    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BBoxes` etc.)
     it can have arbitrary number of leading batch dimensions. For example,
     the image can have ``[..., C, H, W]`` shape. A bounding box can have ``[..., 4]`` shape.
 
@@ -525,7 +525,7 @@ class RandomZoomOut(_RandomApplyTransform):
         output_height = input_height * r
 
     If the input is a :class:`torch.Tensor` or a ``Datapoint`` (e.g. :class:`~torchvision.datapoints.Image`,
-    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BoundingBoxes` etc.)
+    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BBoxes` etc.)
     it can have arbitrary number of leading batch dimensions. For example,
     the image can have ``[..., C, H, W]`` shape. A bounding box can have ``[..., 4]`` shape.
 
@@ -584,7 +584,7 @@ class RandomRotation(Transform):
     .. v2betastatus:: RandomRotation transform
 
     If the input is a :class:`torch.Tensor` or a ``Datapoint`` (e.g. :class:`~torchvision.datapoints.Image`,
-    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BoundingBoxes` etc.)
+    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BBoxes` etc.)
     it can have arbitrary number of leading batch dimensions. For example,
     the image can have ``[..., C, H, W]`` shape. A bounding box can have ``[..., 4]`` shape.
 
@@ -657,7 +657,7 @@ class RandomAffine(Transform):
     .. v2betastatus:: RandomAffine transform
 
     If the input is a :class:`torch.Tensor` or a ``Datapoint`` (e.g. :class:`~torchvision.datapoints.Image`,
-    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BoundingBoxes` etc.)
+    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BBoxes` etc.)
     it can have arbitrary number of leading batch dimensions. For example,
     the image can have ``[..., C, H, W]`` shape. A bounding box can have ``[..., 4]`` shape.
 
@@ -778,7 +778,7 @@ class RandomCrop(Transform):
     .. v2betastatus:: RandomCrop transform
 
     If the input is a :class:`torch.Tensor` or a ``Datapoint`` (e.g. :class:`~torchvision.datapoints.Image`,
-    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BoundingBoxes` etc.)
+    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BBoxes` etc.)
     it can have arbitrary number of leading batch dimensions. For example,
     the image can have ``[..., C, H, W]`` shape. A bounding box can have ``[..., 4]`` shape.
 
@@ -933,7 +933,7 @@ class RandomPerspective(_RandomApplyTransform):
     .. v2betastatus:: RandomPerspective transform
 
     If the input is a :class:`torch.Tensor` or a ``Datapoint`` (e.g. :class:`~torchvision.datapoints.Image`,
-    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BoundingBoxes` etc.)
+    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BBoxes` etc.)
     it can have arbitrary number of leading batch dimensions. For example,
     the image can have ``[..., C, H, W]`` shape. A bounding box can have ``[..., 4]`` shape.
 
@@ -1019,7 +1019,7 @@ class ElasticTransform(Transform):
     .. v2betastatus:: RandomPerspective transform
 
     If the input is a :class:`torch.Tensor` or a ``Datapoint`` (e.g. :class:`~torchvision.datapoints.Image`,
-    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BoundingBoxes` etc.)
+    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BBoxes` etc.)
     it can have arbitrary number of leading batch dimensions. For example,
     the image can have ``[..., C, H, W]`` shape. A bounding box can have ``[..., 4]`` shape.
 
@@ -1110,15 +1110,15 @@ class RandomIoUCrop(Transform):
 
     .. v2betastatus:: RandomIoUCrop transform
 
-    This transformation requires an image or video data and ``datapoints.BoundingBoxes`` in the input.
+    This transformation requires an image or video data and ``datapoints.BBoxes`` in the input.
 
     .. warning::
         In order to properly remove the bounding boxes below the IoU threshold, `RandomIoUCrop`
-        must be followed by :class:`~torchvision.transforms.v2.SanitizeBoundingBoxes`, either immediately
+        must be followed by :class:`~torchvision.transforms.v2.SanitizeBBoxes`, either immediately
         after or later in the transforms pipeline.
 
     If the input is a :class:`torch.Tensor` or a ``Datapoint`` (e.g. :class:`~torchvision.datapoints.Image`,
-    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BoundingBoxes` etc.)
+    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BBoxes` etc.)
     it can have arbitrary number of leading batch dimensions. For example,
     the image can have ``[..., C, H, W]`` shape. A bounding box can have ``[..., 4]`` shape.
 
@@ -1155,7 +1155,7 @@ def __init__(
 
     def _check_inputs(self, flat_inputs: List[Any]) -> None:
         if not (
-            has_all(flat_inputs, datapoints.BoundingBoxes)
+            has_all(flat_inputs, datapoints.BBoxes)
             and has_any(flat_inputs, PIL.Image.Image, datapoints.Image, is_simple_tensor)
         ):
             raise TypeError(
@@ -1165,7 +1165,7 @@ def _check_inputs(self, flat_inputs: List[Any]) -> None:
 
     def _get_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
         orig_h, orig_w = query_spatial_size(flat_inputs)
-        bboxes = query_bounding_boxes(flat_inputs)
+        bboxes = query_bboxes(flat_inputs)
 
         while True:
             # sample an option
@@ -1193,8 +1193,8 @@ def _get_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
                     continue
 
                 # check for any valid boxes with centers within the crop area
-                xyxy_bboxes = F.convert_format_bounding_boxes(
-                    bboxes.as_subclass(torch.Tensor), bboxes.format, datapoints.BoundingBoxFormat.XYXY
+                xyxy_bboxes = F.convert_format_bboxes(
+                    bboxes.as_subclass(torch.Tensor), bboxes.format, datapoints.BBoxFormat.XYXY
                 )
                 cx = 0.5 * (xyxy_bboxes[..., 0] + xyxy_bboxes[..., 2])
                 cy = 0.5 * (xyxy_bboxes[..., 1] + xyxy_bboxes[..., 3])
@@ -1220,9 +1220,9 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
 
         output = F.crop(inpt, top=params["top"], left=params["left"], height=params["height"], width=params["width"])
 
-        if isinstance(output, datapoints.BoundingBoxes):
+        if isinstance(output, datapoints.BBoxes):
             # We "mark" the invalid boxes as degenreate, and they can be
-            # removed by a later call to SanitizeBoundingBoxes()
+            # removed by a later call to SanitizeBBoxes()
             output[~params["is_within_crop_area"]] = 0
 
         return output
@@ -1235,7 +1235,7 @@ class ScaleJitter(Transform):
     .. v2betastatus:: ScaleJitter transform
 
     If the input is a :class:`torch.Tensor` or a ``Datapoint`` (e.g. :class:`~torchvision.datapoints.Image`,
-    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BoundingBoxes` etc.)
+    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BBoxes` etc.)
     it can have arbitrary number of leading batch dimensions. For example,
     the image can have ``[..., C, H, W]`` shape. A bounding box can have ``[..., 4]`` shape.
 
@@ -1301,7 +1301,7 @@ class RandomShortestSize(Transform):
     .. v2betastatus:: RandomShortestSize transform
 
     If the input is a :class:`torch.Tensor` or a ``Datapoint`` (e.g. :class:`~torchvision.datapoints.Image`,
-    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BoundingBoxes` etc.)
+    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BBoxes` etc.)
     it can have arbitrary number of leading batch dimensions. For example,
     the image can have ``[..., C, H, W]`` shape. A bounding box can have ``[..., 4]`` shape.
 
@@ -1380,7 +1380,7 @@ class RandomResize(Transform):
         output_height = size
 
     If the input is a :class:`torch.Tensor` or a ``Datapoint`` (e.g. :class:`~torchvision.datapoints.Image`,
-    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BoundingBoxes` etc.)
+    :class:`~torchvision.datapoints.Video`, :class:`~torchvision.datapoints.BBoxes` etc.)
     it can have arbitrary number of leading batch dimensions. For example,
     the image can have ``[..., C, H, W]`` shape. A bounding box can have ``[..., 4]`` shape.
 
diff --git a/torchvision/transforms/v2/_meta.py b/torchvision/transforms/v2/_meta.py
index 71cc159c907..472ec185f6a 100644
--- a/torchvision/transforms/v2/_meta.py
+++ b/torchvision/transforms/v2/_meta.py
@@ -4,39 +4,39 @@
 from torchvision.transforms.v2 import functional as F, Transform
 
 
-class ConvertBoundingBoxFormat(Transform):
+class ConvertBBoxFormat(Transform):
     """[BETA] Convert bounding box coordinates to the given ``format``, eg from "CXCYWH" to "XYXY".
 
-    .. v2betastatus:: ConvertBoundingBoxFormat transform
+    .. v2betastatus:: ConvertBBoxFormat transform
 
     Args:
-        format (str or datapoints.BoundingBoxFormat): output bounding box format.
-            Possible values are defined by :class:`~torchvision.datapoints.BoundingBoxFormat` and
+        format (str or datapoints.BBoxFormat): output bounding box format.
+            Possible values are defined by :class:`~torchvision.datapoints.BBoxFormat` and
             string values match the enums, e.g. "XYXY" or "XYWH" etc.
     """
 
-    _transformed_types = (datapoints.BoundingBoxes,)
+    _transformed_types = (datapoints.BBoxes,)
 
-    def __init__(self, format: Union[str, datapoints.BoundingBoxFormat]) -> None:
+    def __init__(self, format: Union[str, datapoints.BBoxFormat]) -> None:
         super().__init__()
         if isinstance(format, str):
-            format = datapoints.BoundingBoxFormat[format]
+            format = datapoints.BBoxFormat[format]
         self.format = format
 
-    def _transform(self, inpt: datapoints.BoundingBoxes, params: Dict[str, Any]) -> datapoints.BoundingBoxes:
-        return F.convert_format_bounding_boxes(inpt, new_format=self.format)  # type: ignore[return-value]
+    def _transform(self, inpt: datapoints.BBoxes, params: Dict[str, Any]) -> datapoints.BBoxes:
+        return F.convert_format_bboxes(inpt, new_format=self.format)  # type: ignore[return-value]
 
 
-class ClampBoundingBoxes(Transform):
+class ClampBBoxes(Transform):
     """[BETA] Clamp bounding boxes to their corresponding image dimensions.
 
     The clamping is done according to the bounding boxes' ``spatial_size`` meta-data.
 
-    .. v2betastatus:: ClampBoundingBoxes transform
+    .. v2betastatus:: ClampBBoxes transform
 
     """
 
-    _transformed_types = (datapoints.BoundingBoxes,)
+    _transformed_types = (datapoints.BBoxes,)
 
-    def _transform(self, inpt: datapoints.BoundingBoxes, params: Dict[str, Any]) -> datapoints.BoundingBoxes:
-        return F.clamp_bounding_boxes(inpt)  # type: ignore[return-value]
+    def _transform(self, inpt: datapoints.BBoxes, params: Dict[str, Any]) -> datapoints.BBoxes:
+        return F.clamp_bboxes(inpt)  # type: ignore[return-value]
diff --git a/torchvision/transforms/v2/_misc.py b/torchvision/transforms/v2/_misc.py
index a4cb594b2b3..5cc8993f117 100644
--- a/torchvision/transforms/v2/_misc.py
+++ b/torchvision/transforms/v2/_misc.py
@@ -10,7 +10,7 @@
 from torchvision.transforms.v2 import functional as F, Transform
 
 from ._utils import _parse_labels_getter, _setup_float_or_seq, _setup_size
-from .utils import has_any, is_simple_tensor, query_bounding_boxes
+from .utils import has_any, is_simple_tensor, query_bboxes
 
 
 # TODO: do we want/need to expose this?
@@ -332,16 +332,16 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
         return F.to_dtype(inpt, dtype=self.dtype, scale=True)
 
 
-class SanitizeBoundingBoxes(Transform):
+class SanitizeBBoxes(Transform):
     """[BETA] Remove degenerate/invalid bounding boxes and their corresponding labels and masks.
 
-    .. v2betastatus:: SanitizeBoundingBoxes transform
+    .. v2betastatus:: SanitizeBBoxes transform
 
     This transform removes bounding boxes and their associated labels/masks that:
 
     - are below a given ``min_size``: by default this also removes degenerate boxes that have e.g. X2 <= X1.
     - have any coordinate outside of their corresponding image. You may want to
-      call :class:`~torchvision.transforms.v2.ClampBoundingBoxes` first to avoid undesired removals.
+      call :class:`~torchvision.transforms.v2.ClampBBoxes` first to avoid undesired removals.
 
     It is recommended to call it at the end of a pipeline, before passing the
     input to the models. It is critical to call this transform if
@@ -384,10 +384,10 @@ def forward(self, *inputs: Any) -> Any:
             )
 
         flat_inputs, spec = tree_flatten(inputs)
-        # TODO: this enforces one single BoundingBoxes entry.
+        # TODO: this enforces one single BBoxes entry.
         # Assuming this transform needs to be called at the end of *any* pipeline that has bboxes...
         # should we just enforce it for all transforms?? What are the benefits of *not* enforcing this?
-        boxes = query_bounding_boxes(flat_inputs)
+        boxes = query_bboxes(flat_inputs)
 
         if boxes.ndim != 2:
             raise ValueError(f"boxes must be of shape (num_boxes, 4), got {boxes.shape}")
@@ -398,10 +398,10 @@ def forward(self, *inputs: Any) -> Any:
             )
 
         boxes = cast(
-            datapoints.BoundingBoxes,
-            F.convert_format_bounding_boxes(
+            datapoints.BBoxes,
+            F.convert_format_bboxes(
                 boxes,
-                new_format=datapoints.BoundingBoxFormat.XYXY,
+                new_format=datapoints.BBoxFormat.XYXY,
             ),
         )
         ws, hs = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1]
@@ -415,7 +415,7 @@ def forward(self, *inputs: Any) -> Any:
         params = dict(valid=valid, labels=labels)
         flat_outputs = [
             # Even-though it may look like we're transforming all inputs, we don't:
-            # _transform() will only care about BoundingBoxeses and the labels
+            # _transform() will only care about BBoxeses and the labels
             self._transform(inpt, params)
             for inpt in flat_inputs
         ]
@@ -424,9 +424,9 @@ def forward(self, *inputs: Any) -> Any:
 
     def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
         is_label = inpt is not None and inpt is params["labels"]
-        is_bounding_boxes_or_mask = isinstance(inpt, (datapoints.BoundingBoxes, datapoints.Mask))
+        is_bboxes_or_mask = isinstance(inpt, (datapoints.BBoxes, datapoints.Mask))
 
-        if not (is_label or is_bounding_boxes_or_mask):
+        if not (is_label or is_bboxes_or_mask):
             return inpt
 
         output = inpt[params["valid"]]
diff --git a/torchvision/transforms/v2/functional/__init__.py b/torchvision/transforms/v2/functional/__init__.py
index 16f5ff50071..c841eae38a2 100644
--- a/torchvision/transforms/v2/functional/__init__.py
+++ b/torchvision/transforms/v2/functional/__init__.py
@@ -3,8 +3,8 @@
 from ._utils import is_simple_tensor  # usort: skip
 
 from ._meta import (
-    clamp_bounding_boxes,
-    convert_format_bounding_boxes,
+    clamp_bboxes,
+    convert_format_bboxes,
     get_dimensions_image_tensor,
     get_dimensions_image_pil,
     get_dimensions,
@@ -15,7 +15,7 @@
     get_num_channels_image_pil,
     get_num_channels_video,
     get_num_channels,
-    get_spatial_size_bounding_boxes,
+    get_spatial_size_bboxes,
     get_spatial_size_image_tensor,
     get_spatial_size_image_pil,
     get_spatial_size_mask,
@@ -76,25 +76,25 @@
 )
 from ._geometry import (
     affine,
-    affine_bounding_boxes,
+    affine_bboxes,
     affine_image_pil,
     affine_image_tensor,
     affine_mask,
     affine_video,
     center_crop,
-    center_crop_bounding_boxes,
+    center_crop_bboxes,
     center_crop_image_pil,
     center_crop_image_tensor,
     center_crop_mask,
     center_crop_video,
     crop,
-    crop_bounding_boxes,
+    crop_bboxes,
     crop_image_pil,
     crop_image_tensor,
     crop_mask,
     crop_video,
     elastic,
-    elastic_bounding_boxes,
+    elastic_bboxes,
     elastic_image_pil,
     elastic_image_tensor,
     elastic_mask,
@@ -106,37 +106,37 @@
     five_crop_video,
     hflip,  # TODO: Consider moving all pure alias definitions at the bottom of the file
     horizontal_flip,
-    horizontal_flip_bounding_boxes,
+    horizontal_flip_bboxes,
     horizontal_flip_image_pil,
     horizontal_flip_image_tensor,
     horizontal_flip_mask,
     horizontal_flip_video,
     pad,
-    pad_bounding_boxes,
+    pad_bboxes,
     pad_image_pil,
     pad_image_tensor,
     pad_mask,
     pad_video,
     perspective,
-    perspective_bounding_boxes,
+    perspective_bboxes,
     perspective_image_pil,
     perspective_image_tensor,
     perspective_mask,
     perspective_video,
     resize,
-    resize_bounding_boxes,
+    resize_bboxes,
     resize_image_pil,
     resize_image_tensor,
     resize_mask,
     resize_video,
     resized_crop,
-    resized_crop_bounding_boxes,
+    resized_crop_bboxes,
     resized_crop_image_pil,
     resized_crop_image_tensor,
     resized_crop_mask,
     resized_crop_video,
     rotate,
-    rotate_bounding_boxes,
+    rotate_bboxes,
     rotate_image_pil,
     rotate_image_tensor,
     rotate_mask,
@@ -146,7 +146,7 @@
     ten_crop_image_tensor,
     ten_crop_video,
     vertical_flip,
-    vertical_flip_bounding_boxes,
+    vertical_flip_bboxes,
     vertical_flip_image_pil,
     vertical_flip_image_tensor,
     vertical_flip_mask,
diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py
index 469e58ff9c4..77d19e2e976 100644
--- a/torchvision/transforms/v2/functional/_geometry.py
+++ b/torchvision/transforms/v2/functional/_geometry.py
@@ -23,7 +23,7 @@
 
 from torchvision.utils import _log_api_usage_once
 
-from ._meta import clamp_bounding_boxes, convert_format_bounding_boxes, get_spatial_size_image_pil
+from ._meta import clamp_bboxes, convert_format_bboxes, get_spatial_size_image_pil
 
 from ._utils import is_simple_tensor
 
@@ -51,21 +51,21 @@ def horizontal_flip_mask(mask: torch.Tensor) -> torch.Tensor:
     return horizontal_flip_image_tensor(mask)
 
 
-def horizontal_flip_bounding_boxes(
-    bounding_boxes: torch.Tensor, format: datapoints.BoundingBoxFormat, spatial_size: Tuple[int, int]
+def horizontal_flip_bboxes(
+    bboxes: torch.Tensor, format: datapoints.BBoxFormat, spatial_size: Tuple[int, int]
 ) -> torch.Tensor:
-    shape = bounding_boxes.shape
+    shape = bboxes.shape
 
-    bounding_boxes = bounding_boxes.clone().reshape(-1, 4)
+    bboxes = bboxes.clone().reshape(-1, 4)
 
-    if format == datapoints.BoundingBoxFormat.XYXY:
-        bounding_boxes[:, [2, 0]] = bounding_boxes[:, [0, 2]].sub_(spatial_size[1]).neg_()
-    elif format == datapoints.BoundingBoxFormat.XYWH:
-        bounding_boxes[:, 0].add_(bounding_boxes[:, 2]).sub_(spatial_size[1]).neg_()
-    else:  # format == datapoints.BoundingBoxFormat.CXCYWH:
-        bounding_boxes[:, 0].sub_(spatial_size[1]).neg_()
+    if format == datapoints.BBoxFormat.XYXY:
+        bboxes[:, [2, 0]] = bboxes[:, [0, 2]].sub_(spatial_size[1]).neg_()
+    elif format == datapoints.BBoxFormat.XYWH:
+        bboxes[:, 0].add_(bboxes[:, 2]).sub_(spatial_size[1]).neg_()
+    else:  # format == datapoints.BBoxFormat.CXCYWH:
+        bboxes[:, 0].sub_(spatial_size[1]).neg_()
 
-    return bounding_boxes.reshape(shape)
+    return bboxes.reshape(shape)
 
 
 def horizontal_flip_video(video: torch.Tensor) -> torch.Tensor:
@@ -101,21 +101,21 @@ def vertical_flip_mask(mask: torch.Tensor) -> torch.Tensor:
     return vertical_flip_image_tensor(mask)
 
 
-def vertical_flip_bounding_boxes(
-    bounding_boxes: torch.Tensor, format: datapoints.BoundingBoxFormat, spatial_size: Tuple[int, int]
+def vertical_flip_bboxes(
+    bboxes: torch.Tensor, format: datapoints.BBoxFormat, spatial_size: Tuple[int, int]
 ) -> torch.Tensor:
-    shape = bounding_boxes.shape
+    shape = bboxes.shape
 
-    bounding_boxes = bounding_boxes.clone().reshape(-1, 4)
+    bboxes = bboxes.clone().reshape(-1, 4)
 
-    if format == datapoints.BoundingBoxFormat.XYXY:
-        bounding_boxes[:, [1, 3]] = bounding_boxes[:, [3, 1]].sub_(spatial_size[0]).neg_()
-    elif format == datapoints.BoundingBoxFormat.XYWH:
-        bounding_boxes[:, 1].add_(bounding_boxes[:, 3]).sub_(spatial_size[0]).neg_()
-    else:  # format == datapoints.BoundingBoxFormat.CXCYWH:
-        bounding_boxes[:, 1].sub_(spatial_size[0]).neg_()
+    if format == datapoints.BBoxFormat.XYXY:
+        bboxes[:, [1, 3]] = bboxes[:, [3, 1]].sub_(spatial_size[0]).neg_()
+    elif format == datapoints.BBoxFormat.XYWH:
+        bboxes[:, 1].add_(bboxes[:, 3]).sub_(spatial_size[0]).neg_()
+    else:  # format == datapoints.BBoxFormat.CXCYWH:
+        bboxes[:, 1].sub_(spatial_size[0]).neg_()
 
-    return bounding_boxes.reshape(shape)
+    return bboxes.reshape(shape)
 
 
 def vertical_flip_video(video: torch.Tensor) -> torch.Tensor:
@@ -274,20 +274,20 @@ def resize_mask(mask: torch.Tensor, size: List[int], max_size: Optional[int] = N
     return output
 
 
-def resize_bounding_boxes(
-    bounding_boxes: torch.Tensor, spatial_size: Tuple[int, int], size: List[int], max_size: Optional[int] = None
+def resize_bboxes(
+    bboxes: torch.Tensor, spatial_size: Tuple[int, int], size: List[int], max_size: Optional[int] = None
 ) -> Tuple[torch.Tensor, Tuple[int, int]]:
     old_height, old_width = spatial_size
     new_height, new_width = _compute_resized_output_size(spatial_size, size=size, max_size=max_size)
 
     if (new_height, new_width) == (old_height, old_width):
-        return bounding_boxes, spatial_size
+        return bboxes, spatial_size
 
     w_ratio = new_width / old_width
     h_ratio = new_height / old_height
-    ratios = torch.tensor([w_ratio, h_ratio, w_ratio, h_ratio], device=bounding_boxes.device)
+    ratios = torch.tensor([w_ratio, h_ratio, w_ratio, h_ratio], device=bboxes.device)
     return (
-        bounding_boxes.mul(ratios).to(bounding_boxes.dtype),
+        bboxes.mul(ratios).to(bboxes.dtype),
         (new_height, new_width),
     )
 
@@ -650,9 +650,9 @@ def affine_image_pil(
     return _FP.affine(image, matrix, interpolation=pil_modes_mapping[interpolation], fill=fill)
 
 
-def _affine_bounding_boxes_with_expand(
-    bounding_boxes: torch.Tensor,
-    format: datapoints.BoundingBoxFormat,
+def _affine_bboxes_with_expand(
+    bboxes: torch.Tensor,
+    format: datapoints.BBoxFormat,
     spatial_size: Tuple[int, int],
     angle: Union[int, float],
     translate: List[float],
@@ -661,18 +661,16 @@ def _affine_bounding_boxes_with_expand(
     center: Optional[List[float]] = None,
     expand: bool = False,
 ) -> Tuple[torch.Tensor, Tuple[int, int]]:
-    if bounding_boxes.numel() == 0:
-        return bounding_boxes, spatial_size
-
-    original_shape = bounding_boxes.shape
-    original_dtype = bounding_boxes.dtype
-    bounding_boxes = bounding_boxes.clone() if bounding_boxes.is_floating_point() else bounding_boxes.float()
-    dtype = bounding_boxes.dtype
-    device = bounding_boxes.device
-    bounding_boxes = (
-        convert_format_bounding_boxes(
-            bounding_boxes, old_format=format, new_format=datapoints.BoundingBoxFormat.XYXY, inplace=True
-        )
+    if bboxes.numel() == 0:
+        return bboxes, spatial_size
+
+    original_shape = bboxes.shape
+    original_dtype = bboxes.dtype
+    bboxes = bboxes.clone() if bboxes.is_floating_point() else bboxes.float()
+    dtype = bboxes.dtype
+    device = bboxes.device
+    bboxes = (
+        convert_format_bboxes(bboxes, old_format=format, new_format=datapoints.BBoxFormat.XYXY, inplace=True)
     ).reshape(-1, 4)
 
     angle, translate, shear, center = _affine_parse_args(
@@ -697,7 +695,7 @@ def _affine_bounding_boxes_with_expand(
     # Tensor of points has shape (N * 4, 3), where N is the number of bboxes
     # Single point structure is similar to
     # [(xmin, ymin, 1), (xmax, ymin, 1), (xmax, ymax, 1), (xmin, ymax, 1)]
-    points = bounding_boxes[:, [[0, 1], [2, 1], [2, 3], [0, 3]]].reshape(-1, 2)
+    points = bboxes[:, [[0, 1], [2, 1], [2, 3], [0, 3]]].reshape(-1, 2)
     points = torch.cat([points, torch.ones(points.shape[0], 1, device=device, dtype=dtype)], dim=-1)
     # 2) Now let's transform the points using affine matrix
     transformed_points = torch.matmul(points, transposed_affine_matrix)
@@ -730,18 +728,18 @@ def _affine_bounding_boxes_with_expand(
         new_width, new_height = _compute_affine_output_size(affine_vector, width, height)
         spatial_size = (new_height, new_width)
 
-    out_bboxes = clamp_bounding_boxes(out_bboxes, format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size)
-    out_bboxes = convert_format_bounding_boxes(
-        out_bboxes, old_format=datapoints.BoundingBoxFormat.XYXY, new_format=format, inplace=True
+    out_bboxes = clamp_bboxes(out_bboxes, format=datapoints.BBoxFormat.XYXY, spatial_size=spatial_size)
+    out_bboxes = convert_format_bboxes(
+        out_bboxes, old_format=datapoints.BBoxFormat.XYXY, new_format=format, inplace=True
     ).reshape(original_shape)
 
     out_bboxes = out_bboxes.to(original_dtype)
     return out_bboxes, spatial_size
 
 
-def affine_bounding_boxes(
-    bounding_boxes: torch.Tensor,
-    format: datapoints.BoundingBoxFormat,
+def affine_bboxes(
+    bboxes: torch.Tensor,
+    format: datapoints.BBoxFormat,
     spatial_size: Tuple[int, int],
     angle: Union[int, float],
     translate: List[float],
@@ -749,8 +747,8 @@ def affine_bounding_boxes(
     shear: List[float],
     center: Optional[List[float]] = None,
 ) -> torch.Tensor:
-    out_box, _ = _affine_bounding_boxes_with_expand(
-        bounding_boxes,
+    out_box, _ = _affine_bboxes_with_expand(
+        bboxes,
         format=format,
         spatial_size=spatial_size,
         angle=angle,
@@ -927,9 +925,9 @@ def rotate_image_pil(
     )
 
 
-def rotate_bounding_boxes(
-    bounding_boxes: torch.Tensor,
-    format: datapoints.BoundingBoxFormat,
+def rotate_bboxes(
+    bboxes: torch.Tensor,
+    format: datapoints.BBoxFormat,
     spatial_size: Tuple[int, int],
     angle: float,
     expand: bool = False,
@@ -938,8 +936,8 @@ def rotate_bounding_boxes(
     if center is not None and expand:
         warnings.warn("The provided center argument has no effect on the result if expand is True")
 
-    return _affine_bounding_boxes_with_expand(
-        bounding_boxes,
+    return _affine_bboxes_with_expand(
+        bboxes,
         format=format,
         spatial_size=spatial_size,
         angle=-angle,
@@ -1165,9 +1163,9 @@ def pad_mask(
     return output
 
 
-def pad_bounding_boxes(
-    bounding_boxes: torch.Tensor,
-    format: datapoints.BoundingBoxFormat,
+def pad_bboxes(
+    bboxes: torch.Tensor,
+    format: datapoints.BBoxFormat,
     spatial_size: Tuple[int, int],
     padding: List[int],
     padding_mode: str = "constant",
@@ -1178,18 +1176,18 @@ def pad_bounding_boxes(
 
     left, right, top, bottom = _parse_pad_padding(padding)
 
-    if format == datapoints.BoundingBoxFormat.XYXY:
+    if format == datapoints.BBoxFormat.XYXY:
         pad = [left, top, left, top]
     else:
         pad = [left, top, 0, 0]
-    bounding_boxes = bounding_boxes + torch.tensor(pad, dtype=bounding_boxes.dtype, device=bounding_boxes.device)
+    bboxes = bboxes + torch.tensor(pad, dtype=bboxes.dtype, device=bboxes.device)
 
     height, width = spatial_size
     height += top + bottom
     width += left + right
     spatial_size = (height, width)
 
-    return clamp_bounding_boxes(bounding_boxes, format=format, spatial_size=spatial_size), spatial_size
+    return clamp_bboxes(bboxes, format=format, spatial_size=spatial_size), spatial_size
 
 
 def pad_video(
@@ -1245,9 +1243,9 @@ def crop_image_tensor(image: torch.Tensor, top: int, left: int, height: int, wid
 crop_image_pil = _FP.crop
 
 
-def crop_bounding_boxes(
-    bounding_boxes: torch.Tensor,
-    format: datapoints.BoundingBoxFormat,
+def crop_bboxes(
+    bboxes: torch.Tensor,
+    format: datapoints.BBoxFormat,
     top: int,
     left: int,
     height: int,
@@ -1255,15 +1253,15 @@ def crop_bounding_boxes(
 ) -> Tuple[torch.Tensor, Tuple[int, int]]:
 
     # Crop or implicit pad if left and/or top have negative values:
-    if format == datapoints.BoundingBoxFormat.XYXY:
+    if format == datapoints.BBoxFormat.XYXY:
         sub = [left, top, left, top]
     else:
         sub = [left, top, 0, 0]
 
-    bounding_boxes = bounding_boxes - torch.tensor(sub, dtype=bounding_boxes.dtype, device=bounding_boxes.device)
+    bboxes = bboxes - torch.tensor(sub, dtype=bboxes.dtype, device=bboxes.device)
     spatial_size = (height, width)
 
-    return clamp_bounding_boxes(bounding_boxes, format=format, spatial_size=spatial_size), spatial_size
+    return clamp_bboxes(bboxes, format=format, spatial_size=spatial_size), spatial_size
 
 
 def crop_mask(mask: torch.Tensor, top: int, left: int, height: int, width: int) -> torch.Tensor:
@@ -1409,27 +1407,25 @@ def perspective_image_pil(
     return _FP.perspective(image, perspective_coeffs, interpolation=pil_modes_mapping[interpolation], fill=fill)
 
 
-def perspective_bounding_boxes(
-    bounding_boxes: torch.Tensor,
-    format: datapoints.BoundingBoxFormat,
+def perspective_bboxes(
+    bboxes: torch.Tensor,
+    format: datapoints.BBoxFormat,
     spatial_size: Tuple[int, int],
     startpoints: Optional[List[List[int]]],
     endpoints: Optional[List[List[int]]],
     coefficients: Optional[List[float]] = None,
 ) -> torch.Tensor:
-    if bounding_boxes.numel() == 0:
-        return bounding_boxes
+    if bboxes.numel() == 0:
+        return bboxes
 
     perspective_coeffs = _perspective_coefficients(startpoints, endpoints, coefficients)
 
-    original_shape = bounding_boxes.shape
-    # TODO: first cast to float if bbox is int64 before convert_format_bounding_boxes
-    bounding_boxes = (
-        convert_format_bounding_boxes(bounding_boxes, old_format=format, new_format=datapoints.BoundingBoxFormat.XYXY)
-    ).reshape(-1, 4)
+    original_shape = bboxes.shape
+    # TODO: first cast to float if bbox is int64 before convert_format_bboxes
+    bboxes = (convert_format_bboxes(bboxes, old_format=format, new_format=datapoints.BBoxFormat.XYXY)).reshape(-1, 4)
 
-    dtype = bounding_boxes.dtype if torch.is_floating_point(bounding_boxes) else torch.float32
-    device = bounding_boxes.device
+    dtype = bboxes.dtype if torch.is_floating_point(bboxes) else torch.float32
+    device = bboxes.device
 
     # perspective_coeffs are computed as endpoint -> start point
     # We have to invert perspective_coeffs for bboxes:
@@ -1475,7 +1471,7 @@ def perspective_bounding_boxes(
     # Tensor of points has shape (N * 4, 3), where N is the number of bboxes
     # Single point structure is similar to
     # [(xmin, ymin, 1), (xmax, ymin, 1), (xmax, ymax, 1), (xmin, ymax, 1)]
-    points = bounding_boxes[:, [[0, 1], [2, 1], [2, 3], [0, 3]]].reshape(-1, 2)
+    points = bboxes[:, [[0, 1], [2, 1], [2, 3], [0, 3]]].reshape(-1, 2)
     points = torch.cat([points, torch.ones(points.shape[0], 1, device=points.device)], dim=-1)
     # 2) Now let's transform the points using perspective matrices
     #   x_out = (coeffs[0] * x + coeffs[1] * y + coeffs[2]) / (coeffs[6] * x + coeffs[7] * y + 1)
@@ -1490,16 +1486,16 @@ def perspective_bounding_boxes(
     transformed_points = transformed_points.reshape(-1, 4, 2)
     out_bbox_mins, out_bbox_maxs = torch.aminmax(transformed_points, dim=1)
 
-    out_bboxes = clamp_bounding_boxes(
-        torch.cat([out_bbox_mins, out_bbox_maxs], dim=1).to(bounding_boxes.dtype),
-        format=datapoints.BoundingBoxFormat.XYXY,
+    out_bboxes = clamp_bboxes(
+        torch.cat([out_bbox_mins, out_bbox_maxs], dim=1).to(bboxes.dtype),
+        format=datapoints.BBoxFormat.XYXY,
         spatial_size=spatial_size,
     )
 
     # out_bboxes should be of shape [N boxes, 4]
 
-    return convert_format_bounding_boxes(
-        out_bboxes, old_format=datapoints.BoundingBoxFormat.XYXY, new_format=format, inplace=True
+    return convert_format_bboxes(
+        out_bboxes, old_format=datapoints.BBoxFormat.XYXY, new_format=format, inplace=True
     ).reshape(original_shape)
 
 
@@ -1648,27 +1644,25 @@ def _create_identity_grid(size: Tuple[int, int], device: torch.device, dtype: to
     return base_grid
 
 
-def elastic_bounding_boxes(
-    bounding_boxes: torch.Tensor,
-    format: datapoints.BoundingBoxFormat,
+def elastic_bboxes(
+    bboxes: torch.Tensor,
+    format: datapoints.BBoxFormat,
     spatial_size: Tuple[int, int],
     displacement: torch.Tensor,
 ) -> torch.Tensor:
-    if bounding_boxes.numel() == 0:
-        return bounding_boxes
+    if bboxes.numel() == 0:
+        return bboxes
 
     # TODO: add in docstring about approximation we are doing for grid inversion
-    device = bounding_boxes.device
-    dtype = bounding_boxes.dtype if torch.is_floating_point(bounding_boxes) else torch.float32
+    device = bboxes.device
+    dtype = bboxes.dtype if torch.is_floating_point(bboxes) else torch.float32
 
     if displacement.dtype != dtype or displacement.device != device:
         displacement = displacement.to(dtype=dtype, device=device)
 
-    original_shape = bounding_boxes.shape
-    # TODO: first cast to float if bbox is int64 before convert_format_bounding_boxes
-    bounding_boxes = (
-        convert_format_bounding_boxes(bounding_boxes, old_format=format, new_format=datapoints.BoundingBoxFormat.XYXY)
-    ).reshape(-1, 4)
+    original_shape = bboxes.shape
+    # TODO: first cast to float if bbox is int64 before convert_format_bboxes
+    bboxes = (convert_format_bboxes(bboxes, old_format=format, new_format=datapoints.BBoxFormat.XYXY)).reshape(-1, 4)
 
     id_grid = _create_identity_grid(spatial_size, device=device, dtype=dtype)
     # We construct an approximation of inverse grid as inv_grid = id_grid - displacement
@@ -1676,7 +1670,7 @@ def elastic_bounding_boxes(
     inv_grid = id_grid.sub_(displacement)
 
     # Get points from bboxes
-    points = bounding_boxes[:, [[0, 1], [2, 1], [2, 3], [0, 3]]].reshape(-1, 2)
+    points = bboxes[:, [[0, 1], [2, 1], [2, 3], [0, 3]]].reshape(-1, 2)
     if points.is_floating_point():
         points = points.ceil_()
     index_xy = points.to(dtype=torch.long)
@@ -1688,14 +1682,14 @@ def elastic_bounding_boxes(
 
     transformed_points = transformed_points.reshape(-1, 4, 2)
     out_bbox_mins, out_bbox_maxs = torch.aminmax(transformed_points, dim=1)
-    out_bboxes = clamp_bounding_boxes(
-        torch.cat([out_bbox_mins, out_bbox_maxs], dim=1).to(bounding_boxes.dtype),
-        format=datapoints.BoundingBoxFormat.XYXY,
+    out_bboxes = clamp_bboxes(
+        torch.cat([out_bbox_mins, out_bbox_maxs], dim=1).to(bboxes.dtype),
+        format=datapoints.BBoxFormat.XYXY,
         spatial_size=spatial_size,
     )
 
-    return convert_format_bounding_boxes(
-        out_bboxes, old_format=datapoints.BoundingBoxFormat.XYXY, new_format=format, inplace=True
+    return convert_format_bboxes(
+        out_bboxes, old_format=datapoints.BBoxFormat.XYXY, new_format=format, inplace=True
     ).reshape(original_shape)
 
 
@@ -1818,17 +1812,15 @@ def center_crop_image_pil(image: PIL.Image.Image, output_size: List[int]) -> PIL
     return crop_image_pil(image, crop_top, crop_left, crop_height, crop_width)
 
 
-def center_crop_bounding_boxes(
-    bounding_boxes: torch.Tensor,
-    format: datapoints.BoundingBoxFormat,
+def center_crop_bboxes(
+    bboxes: torch.Tensor,
+    format: datapoints.BBoxFormat,
     spatial_size: Tuple[int, int],
     output_size: List[int],
 ) -> Tuple[torch.Tensor, Tuple[int, int]]:
     crop_height, crop_width = _center_crop_parse_output_size(output_size)
     crop_top, crop_left = _center_crop_compute_crop_anchor(crop_height, crop_width, *spatial_size)
-    return crop_bounding_boxes(
-        bounding_boxes, format, top=crop_top, left=crop_left, height=crop_height, width=crop_width
-    )
+    return crop_bboxes(bboxes, format, top=crop_top, left=crop_left, height=crop_height, width=crop_width)
 
 
 def center_crop_mask(mask: torch.Tensor, output_size: List[int]) -> torch.Tensor:
@@ -1895,17 +1887,17 @@ def resized_crop_image_pil(
     return resize_image_pil(image, size, interpolation=interpolation)
 
 
-def resized_crop_bounding_boxes(
-    bounding_boxes: torch.Tensor,
-    format: datapoints.BoundingBoxFormat,
+def resized_crop_bboxes(
+    bboxes: torch.Tensor,
+    format: datapoints.BBoxFormat,
     top: int,
     left: int,
     height: int,
     width: int,
     size: List[int],
 ) -> Tuple[torch.Tensor, Tuple[int, int]]:
-    bounding_boxes, _ = crop_bounding_boxes(bounding_boxes, format, top, left, height, width)
-    return resize_bounding_boxes(bounding_boxes, spatial_size=(height, width), size=size)
+    bboxes, _ = crop_bboxes(bboxes, format, top, left, height, width)
+    return resize_bboxes(bboxes, spatial_size=(height, width), size=size)
 
 
 def resized_crop_mask(
diff --git a/torchvision/transforms/v2/functional/_meta.py b/torchvision/transforms/v2/functional/_meta.py
index f564b180389..fa4c638b093 100644
--- a/torchvision/transforms/v2/functional/_meta.py
+++ b/torchvision/transforms/v2/functional/_meta.py
@@ -3,7 +3,7 @@
 import PIL.Image
 import torch
 from torchvision import datapoints
-from torchvision.datapoints import BoundingBoxFormat
+from torchvision.datapoints import BBoxFormat
 from torchvision.transforms import _functional_pil as _FP
 
 from torchvision.utils import _log_api_usage_once
@@ -109,8 +109,8 @@ def get_spatial_size_mask(mask: torch.Tensor) -> List[int]:
 
 
 @torch.jit.unused
-def get_spatial_size_bounding_boxes(bounding_boxes: datapoints.BoundingBoxes) -> List[int]:
-    return list(bounding_boxes.spatial_size)
+def get_spatial_size_bboxes(bboxes: datapoints.BBoxes) -> List[int]:
+    return list(bboxes.spatial_size)
 
 
 def get_spatial_size(inpt: datapoints._InputTypeJIT) -> List[int]:
@@ -119,7 +119,7 @@ def get_spatial_size(inpt: datapoints._InputTypeJIT) -> List[int]:
 
     if torch.jit.is_scripting() or is_simple_tensor(inpt):
         return get_spatial_size_image_tensor(inpt)
-    elif isinstance(inpt, (datapoints.Image, datapoints.Video, datapoints.BoundingBoxes, datapoints.Mask)):
+    elif isinstance(inpt, (datapoints.Image, datapoints.Video, datapoints.BBoxes, datapoints.Mask)):
         return list(inpt.spatial_size)
     elif isinstance(inpt, PIL.Image.Image):
         return get_spatial_size_image_pil(inpt)
@@ -185,97 +185,89 @@ def _xyxy_to_cxcywh(xyxy: torch.Tensor, inplace: bool) -> torch.Tensor:
     return xyxy
 
 
-def _convert_format_bounding_boxes(
-    bounding_boxes: torch.Tensor, old_format: BoundingBoxFormat, new_format: BoundingBoxFormat, inplace: bool = False
+def _convert_format_bboxes(
+    bboxes: torch.Tensor, old_format: BBoxFormat, new_format: BBoxFormat, inplace: bool = False
 ) -> torch.Tensor:
 
     if new_format == old_format:
-        return bounding_boxes
+        return bboxes
 
     # TODO: Add _xywh_to_cxcywh and _cxcywh_to_xywh to improve performance
-    if old_format == BoundingBoxFormat.XYWH:
-        bounding_boxes = _xywh_to_xyxy(bounding_boxes, inplace)
-    elif old_format == BoundingBoxFormat.CXCYWH:
-        bounding_boxes = _cxcywh_to_xyxy(bounding_boxes, inplace)
+    if old_format == BBoxFormat.XYWH:
+        bboxes = _xywh_to_xyxy(bboxes, inplace)
+    elif old_format == BBoxFormat.CXCYWH:
+        bboxes = _cxcywh_to_xyxy(bboxes, inplace)
 
-    if new_format == BoundingBoxFormat.XYWH:
-        bounding_boxes = _xyxy_to_xywh(bounding_boxes, inplace)
-    elif new_format == BoundingBoxFormat.CXCYWH:
-        bounding_boxes = _xyxy_to_cxcywh(bounding_boxes, inplace)
+    if new_format == BBoxFormat.XYWH:
+        bboxes = _xyxy_to_xywh(bboxes, inplace)
+    elif new_format == BBoxFormat.CXCYWH:
+        bboxes = _xyxy_to_cxcywh(bboxes, inplace)
 
-    return bounding_boxes
+    return bboxes
 
 
-def convert_format_bounding_boxes(
+def convert_format_bboxes(
     inpt: datapoints._InputTypeJIT,
-    old_format: Optional[BoundingBoxFormat] = None,
-    new_format: Optional[BoundingBoxFormat] = None,
+    old_format: Optional[BBoxFormat] = None,
+    new_format: Optional[BBoxFormat] = None,
     inplace: bool = False,
 ) -> datapoints._InputTypeJIT:
     # This being a kernel / dispatcher hybrid, we need an option to pass `old_format` explicitly for simple tensor
-    # inputs as well as extract it from `datapoints.BoundingBoxes` inputs. However, putting a default value on
+    # inputs as well as extract it from `datapoints.BBoxes` inputs. However, putting a default value on
     # `old_format` means we also need to put one on `new_format` to have syntactically correct Python. Here we mimic the
     # default error that would be thrown if `new_format` had no default value.
     if new_format is None:
-        raise TypeError("convert_format_bounding_boxes() missing 1 required argument: 'new_format'")
+        raise TypeError("convert_format_bboxes() missing 1 required argument: 'new_format'")
 
     if not torch.jit.is_scripting():
-        _log_api_usage_once(convert_format_bounding_boxes)
+        _log_api_usage_once(convert_format_bboxes)
 
     if torch.jit.is_scripting() or is_simple_tensor(inpt):
         if old_format is None:
             raise ValueError("For simple tensor inputs, `old_format` has to be passed.")
-        return _convert_format_bounding_boxes(inpt, old_format=old_format, new_format=new_format, inplace=inplace)
-    elif isinstance(inpt, datapoints.BoundingBoxes):
+        return _convert_format_bboxes(inpt, old_format=old_format, new_format=new_format, inplace=inplace)
+    elif isinstance(inpt, datapoints.BBoxes):
         if old_format is not None:
             raise ValueError("For bounding box datapoint inputs, `old_format` must not be passed.")
-        output = _convert_format_bounding_boxes(
+        output = _convert_format_bboxes(
             inpt.as_subclass(torch.Tensor), old_format=inpt.format, new_format=new_format, inplace=inplace
         )
-        return datapoints.BoundingBoxes.wrap_like(inpt, output, format=new_format)
+        return datapoints.BBoxes.wrap_like(inpt, output, format=new_format)
     else:
         raise TypeError(
             f"Input can either be a plain tensor or a bounding box datapoint, but got {type(inpt)} instead."
         )
 
 
-def _clamp_bounding_boxes(
-    bounding_boxes: torch.Tensor, format: BoundingBoxFormat, spatial_size: Tuple[int, int]
-) -> torch.Tensor:
+def _clamp_bboxes(bboxes: torch.Tensor, format: BBoxFormat, spatial_size: Tuple[int, int]) -> torch.Tensor:
     # TODO: Investigate if it makes sense from a performance perspective to have an implementation for every
-    #  BoundingBoxFormat instead of converting back and forth
-    in_dtype = bounding_boxes.dtype
-    bounding_boxes = bounding_boxes.clone() if bounding_boxes.is_floating_point() else bounding_boxes.float()
-    xyxy_boxes = convert_format_bounding_boxes(
-        bounding_boxes, old_format=format, new_format=datapoints.BoundingBoxFormat.XYXY, inplace=True
-    )
+    #  BBoxFormat instead of converting back and forth
+    in_dtype = bboxes.dtype
+    bboxes = bboxes.clone() if bboxes.is_floating_point() else bboxes.float()
+    xyxy_boxes = convert_format_bboxes(bboxes, old_format=format, new_format=datapoints.BBoxFormat.XYXY, inplace=True)
     xyxy_boxes[..., 0::2].clamp_(min=0, max=spatial_size[1])
     xyxy_boxes[..., 1::2].clamp_(min=0, max=spatial_size[0])
-    out_boxes = convert_format_bounding_boxes(
-        xyxy_boxes, old_format=BoundingBoxFormat.XYXY, new_format=format, inplace=True
-    )
+    out_boxes = convert_format_bboxes(xyxy_boxes, old_format=BBoxFormat.XYXY, new_format=format, inplace=True)
     return out_boxes.to(in_dtype)
 
 
-def clamp_bounding_boxes(
+def clamp_bboxes(
     inpt: datapoints._InputTypeJIT,
-    format: Optional[BoundingBoxFormat] = None,
+    format: Optional[BBoxFormat] = None,
     spatial_size: Optional[Tuple[int, int]] = None,
 ) -> datapoints._InputTypeJIT:
     if not torch.jit.is_scripting():
-        _log_api_usage_once(clamp_bounding_boxes)
+        _log_api_usage_once(clamp_bboxes)
 
     if torch.jit.is_scripting() or is_simple_tensor(inpt):
         if format is None or spatial_size is None:
             raise ValueError("For simple tensor inputs, `format` and `spatial_size` has to be passed.")
-        return _clamp_bounding_boxes(inpt, format=format, spatial_size=spatial_size)
-    elif isinstance(inpt, datapoints.BoundingBoxes):
+        return _clamp_bboxes(inpt, format=format, spatial_size=spatial_size)
+    elif isinstance(inpt, datapoints.BBoxes):
         if format is not None or spatial_size is not None:
             raise ValueError("For bounding box datapoint inputs, `format` and `spatial_size` must not be passed.")
-        output = _clamp_bounding_boxes(
-            inpt.as_subclass(torch.Tensor), format=inpt.format, spatial_size=inpt.spatial_size
-        )
-        return datapoints.BoundingBoxes.wrap_like(inpt, output)
+        output = _clamp_bboxes(inpt.as_subclass(torch.Tensor), format=inpt.format, spatial_size=inpt.spatial_size)
+        return datapoints.BBoxes.wrap_like(inpt, output)
     else:
         raise TypeError(
             f"Input can either be a plain tensor or a bounding box datapoint, but got {type(inpt)} instead."
diff --git a/torchvision/transforms/v2/utils.py b/torchvision/transforms/v2/utils.py
index 978333296d0..37eca94a24e 100644
--- a/torchvision/transforms/v2/utils.py
+++ b/torchvision/transforms/v2/utils.py
@@ -9,13 +9,13 @@
 from torchvision.transforms.v2.functional import get_dimensions, get_spatial_size, is_simple_tensor
 
 
-def query_bounding_boxes(flat_inputs: List[Any]) -> datapoints.BoundingBoxes:
-    bounding_boxes = [inpt for inpt in flat_inputs if isinstance(inpt, datapoints.BoundingBoxes)]
-    if not bounding_boxes:
+def query_bboxes(flat_inputs: List[Any]) -> datapoints.BBoxes:
+    bboxes = [inpt for inpt in flat_inputs if isinstance(inpt, datapoints.BBoxes)]
+    if not bboxes:
         raise TypeError("No bounding box was found in the sample")
-    elif len(bounding_boxes) > 1:
+    elif len(bboxes) > 1:
         raise ValueError("Found multiple bounding boxes in the sample")
-    return bounding_boxes.pop()
+    return bboxes.pop()
 
 
 def query_chw(flat_inputs: List[Any]) -> Tuple[int, int, int]:
@@ -36,9 +36,7 @@ def query_spatial_size(flat_inputs: List[Any]) -> Tuple[int, int]:
     sizes = {
         tuple(get_spatial_size(inpt))
         for inpt in flat_inputs
-        if isinstance(
-            inpt, (datapoints.Image, PIL.Image.Image, datapoints.Video, datapoints.Mask, datapoints.BoundingBoxes)
-        )
+        if isinstance(inpt, (datapoints.Image, PIL.Image.Image, datapoints.Video, datapoints.Mask, datapoints.BBoxes))
         or is_simple_tensor(inpt)
     }
     if not sizes:
diff --git a/torchvision/utils.py b/torchvision/utils.py
index 6ec19a0e0a1..b78238a621c 100644
--- a/torchvision/utils.py
+++ b/torchvision/utils.py
@@ -13,7 +13,7 @@
 __all__ = [
     "make_grid",
     "save_image",
-    "draw_bounding_boxes",
+    "draw_bboxes",
     "draw_segmentation_masks",
     "draw_keypoints",
     "flow_to_image",
@@ -151,7 +151,7 @@ def save_image(
 
 
 @torch.no_grad()
-def draw_bounding_boxes(
+def draw_bboxes(
     image: torch.Tensor,
     boxes: torch.Tensor,
     labels: Optional[List[str]] = None,
@@ -189,7 +189,7 @@ def draw_bounding_boxes(
     """
 
     if not torch.jit.is_scripting() and not torch.jit.is_tracing():
-        _log_api_usage_once(draw_bounding_boxes)
+        _log_api_usage_once(draw_bboxes)
     if not isinstance(image, torch.Tensor):
         raise TypeError(f"Tensor expected, got {type(image)}")
     elif image.dtype != torch.uint8: