Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/source/datapoints.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@ see e.g. :ref:`sphx_glr_auto_examples_plot_transforms_v2_e2e.py`.

Image
Video
BoundingBoxFormat
BoundingBoxes
BBoxFormat
BBoxes
Mask
4 changes: 2 additions & 2 deletions docs/source/models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ Here is an example of how to use the pre-trained object detection models:

from torchvision.io.image import read_image
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.utils import draw_bounding_boxes
from torchvision.utils import draw_bboxes
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would have to revert changes to existing APIs

from torchvision.transforms.functional import to_pil_image

img = read_image("test/assets/encode_jpeg/grace_hopper_517x606.jpg")
Expand All @@ -432,7 +432,7 @@ Here is an example of how to use the pre-trained object detection models:
# Step 4: Use the model and visualize the prediction
prediction = model(batch)[0]
labels = [weights.meta["categories"][i] for i in prediction["labels"]]
box = draw_bounding_boxes(img, boxes=prediction["boxes"],
box = draw_bboxes(img, boxes=prediction["boxes"],
labels=labels,
colors="red",
width=4, font_size=30)
Expand Down
6 changes: 3 additions & 3 deletions docs/source/transforms.rst
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,8 @@ Miscellaneous
v2.RandomErasing
Lambda
v2.Lambda
v2.SanitizeBoundingBoxes
v2.ClampBoundingBoxes
v2.SanitizeBBoxes
v2.ClampBBoxes
v2.UniformTemporalSubsample

.. _conversion_transforms:
Expand Down Expand Up @@ -236,7 +236,7 @@ Conversion
ConvertImageDtype
v2.ConvertImageDtype
v2.ToDtype
v2.ConvertBoundingBoxFormat
v2.ConvertBBoxFormat

Auto-Augmentation
-----------------
Expand Down
2 changes: 1 addition & 1 deletion docs/source/utils.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ visualization <sphx_glr_auto_examples_plot_visualization_utils.py>`.
:toctree: generated/
:template: function.rst

draw_bounding_boxes
draw_bboxes
draw_segmentation_masks
draw_keypoints
flow_to_image
Expand Down
4 changes: 2 additions & 2 deletions references/detection/presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ def __init__(

if use_v2:
transforms += [
T.ConvertBoundingBoxFormat(datapoints.BoundingBoxFormat.XYXY),
T.SanitizeBoundingBoxes(),
T.ConvertBBoxFormat(datapoints.BBoxFormat.XYXY),
T.SanitizeBBoxes(),
]

self.transforms = T.Compose(transforms)
Expand Down
6 changes: 3 additions & 3 deletions test/builtin_dataset_mocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -925,7 +925,7 @@ def _make_attributes_file(cls, root, image_file_names):
cls._make_ann_file(root, "list_attr_celeba.txt", data, field_names=(*field_names, ""))

@classmethod
def _make_bounding_boxes_file(cls, root, image_file_names):
def _make_bboxes_file(cls, root, image_file_names):
field_names = ("image_id", "x_1", "y_1", "width", "height")
data = [
[f"{name} ", *[f"{coord:3d}" for coord in make_tensor((4,), low=0, dtype=torch.int).tolist()]]
Expand Down Expand Up @@ -960,7 +960,7 @@ def generate(cls, root):
for make_ann_file_fn in (
cls._make_identity_file,
cls._make_attributes_file,
cls._make_bounding_boxes_file,
cls._make_bboxes_file,
cls._make_landmarks_file,
):
make_ann_file_fn(root, image_file_names)
Expand Down Expand Up @@ -1342,7 +1342,7 @@ def _make_archive(cls, root):
with open(archive_folder / "train_test_split.txt", "w") as file:
file.write("\n".join(f"{image_id} {split_id}" for image_id, split_id in zip(image_ids, split_ids)))

with open(archive_folder / "bounding_boxes.txt", "w") as file:
with open(archive_folder / "bboxes.txt", "w") as file:
file.write(
"\n".join(
" ".join(
Expand Down
48 changes: 23 additions & 25 deletions test/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,15 +620,15 @@ def make_image_loaders_for_interpolation(


@dataclasses.dataclass
class BoundingBoxesLoader(TensorLoader):
format: datapoints.BoundingBoxFormat
class BBoxesLoader(TensorLoader):
format: datapoints.BBoxFormat
spatial_size: Tuple[int, int]


def make_bounding_box(
def make_bbox(
size=None,
*,
format=datapoints.BoundingBoxFormat.XYXY,
format=datapoints.BBoxFormat.XYXY,
spatial_size=None,
batch_dims=(),
dtype=None,
Expand All @@ -639,25 +639,25 @@ def make_bounding_box(
- (box[3] - box[1], box[2] - box[0]) for XYXY
- (H, W) for XYWH and CXCYWH
spatial_size: Size of the reference object, e.g. an image. Corresponds to the .spatial_size attribute on
returned datapoints.BoundingBoxes
returned datapoints.BBoxes

To generate a valid joint sample, you need to set spatial_size here to the same value as size on the other maker
functions, e.g.

.. code::

image = make_image=(size=size)
bounding_boxes = make_bounding_box(spatial_size=size)
assert F.get_spatial_size(bounding_boxes) == F.get_spatial_size(image)
bboxes = make_bbox(spatial_size=size)
assert F.get_spatial_size(bboxes) == F.get_spatial_size(image)

For convenience, if both size and spatial_size are omitted, spatial_size defaults to the same value as size for all
other maker functions, e.g.

.. code::

image = make_image=()
bounding_boxes = make_bounding_box()
assert F.get_spatial_size(bounding_boxes) == F.get_spatial_size(image)
bboxes = make_bbox()
assert F.get_spatial_size(bboxes) == F.get_spatial_size(image)
"""

def sample_position(values, max_value):
Expand All @@ -666,7 +666,7 @@ def sample_position(values, max_value):
return torch.stack([torch.randint(max_value - v, ()) for v in values.flatten().tolist()]).reshape(values.shape)

if isinstance(format, str):
format = datapoints.BoundingBoxFormat[format]
format = datapoints.BBoxFormat[format]

if spatial_size is None:
if size is None:
Expand All @@ -679,7 +679,7 @@ def sample_position(values, max_value):
dtype = dtype or torch.float32

if any(dim == 0 for dim in batch_dims):
return datapoints.BoundingBoxes(
return datapoints.BBoxes(
torch.empty(*batch_dims, 4, dtype=dtype, device=device), format=format, spatial_size=spatial_size
)

Expand All @@ -691,28 +691,28 @@ def sample_position(values, max_value):
y = sample_position(h, spatial_size[0])
x = sample_position(w, spatial_size[1])

if format is datapoints.BoundingBoxFormat.XYWH:
if format is datapoints.BBoxFormat.XYWH:
parts = (x, y, w, h)
elif format is datapoints.BoundingBoxFormat.XYXY:
elif format is datapoints.BBoxFormat.XYXY:
x1, y1 = x, y
x2 = x1 + w
y2 = y1 + h
parts = (x1, y1, x2, y2)
elif format is datapoints.BoundingBoxFormat.CXCYWH:
elif format is datapoints.BBoxFormat.CXCYWH:
cx = x + w / 2
cy = y + h / 2
parts = (cx, cy, w, h)
else:
raise ValueError(f"Format {format} is not supported")

return datapoints.BoundingBoxes(
return datapoints.BBoxes(
torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=spatial_size
)


def make_bounding_box_loader(*, extra_dims=(), format, spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.float32):
def make_bbox_loader(*, extra_dims=(), format, spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.float32):
if isinstance(format, str):
format = datapoints.BoundingBoxFormat[format]
format = datapoints.BBoxFormat[format]

spatial_size = _parse_spatial_size(spatial_size, name="spatial_size")

Expand All @@ -721,25 +721,23 @@ def fn(shape, dtype, device):
if num_coordinates != 4:
raise pytest.UsageError()

return make_bounding_box(
format=format, spatial_size=spatial_size, batch_dims=batch_dims, dtype=dtype, device=device
)
return make_bbox(format=format, spatial_size=spatial_size, batch_dims=batch_dims, dtype=dtype, device=device)

return BoundingBoxesLoader(fn, shape=(*extra_dims, 4), dtype=dtype, format=format, spatial_size=spatial_size)
return BBoxesLoader(fn, shape=(*extra_dims, 4), dtype=dtype, format=format, spatial_size=spatial_size)


def make_bounding_box_loaders(
def make_bbox_loaders(
*,
extra_dims=DEFAULT_EXTRA_DIMS,
formats=tuple(datapoints.BoundingBoxFormat),
formats=tuple(datapoints.BBoxFormat),
spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE,
dtypes=(torch.float32, torch.float64, torch.int64),
):
for params in combinations_grid(extra_dims=extra_dims, format=formats, dtype=dtypes):
yield make_bounding_box_loader(**params, spatial_size=spatial_size)
yield make_bbox_loader(**params, spatial_size=spatial_size)


make_bounding_boxes = from_loaders(make_bounding_box_loaders)
make_bboxes = from_loaders(make_bbox_loaders)


class MaskLoader(TensorLoader):
Expand Down
10 changes: 4 additions & 6 deletions test/test_datapoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,13 @@ def test_mask_instance(data):


@pytest.mark.parametrize("data", [torch.randint(0, 32, size=(5, 4)), [[0, 0, 5, 5], [2, 2, 7, 7]]])
@pytest.mark.parametrize(
"format", ["XYXY", "CXCYWH", datapoints.BoundingBoxFormat.XYXY, datapoints.BoundingBoxFormat.XYWH]
)
@pytest.mark.parametrize("format", ["XYXY", "CXCYWH", datapoints.BBoxFormat.XYXY, datapoints.BBoxFormat.XYWH])
def test_bbox_instance(data, format):
bboxes = datapoints.BoundingBoxes(data, format=format, spatial_size=(32, 32))
bboxes = datapoints.BBoxes(data, format=format, spatial_size=(32, 32))
assert isinstance(bboxes, torch.Tensor)
assert bboxes.ndim == 2 and bboxes.shape[1] == 4
if isinstance(format, str):
format = datapoints.BoundingBoxFormat[(format.upper())]
format = datapoints.BBoxFormat[(format.upper())]
assert bboxes.format == format


Expand Down Expand Up @@ -164,7 +162,7 @@ def test_wrap_like():
[
datapoints.Image(torch.rand(3, 16, 16)),
datapoints.Video(torch.rand(2, 3, 16, 16)),
datapoints.BoundingBoxes([0.0, 1.0, 2.0, 3.0], format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(10, 10)),
datapoints.BBoxes([0.0, 1.0, 2.0, 3.0], format=datapoints.BBoxFormat.XYXY, spatial_size=(10, 10)),
datapoints.Mask(torch.randint(0, 256, (16, 16), dtype=torch.uint8)),
],
)
Expand Down
Loading