Skip to content

Commit 8fad2ca

Browse files
authored
Use blueprint in arkit_scenes demo, leveraging the viewer's ability to re-project 3D->2D (#5510)
### What * Part of #3412 Removes a lot of code from the arkit demo and makes it look a little bit nicer (beyond the 2d reprojections looking broken before!): * two tabs for 2D, one with depth one with rgb * named space views ![image](https://github.com/rerun-io/rerun/assets/1220815/eb7616da-ed6f-45bc-93ca-9453fdf5be17) ### Checklist * [x] I have read and agree to [Contributor Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and the [Code of Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md) * [x] I've included a screenshot or gif (if applicable) * [x] I have tested the web demo (if applicable): * Using newly built examples: [app.rerun.io](https://app.rerun.io/pr/5510/index.html) * Using examples from latest `main` build: [app.rerun.io](https://app.rerun.io/pr/5510/index.html?manifest_url=https://app.rerun.io/version/main/examples_manifest.json) * Using full set of examples from `nightly` build: [app.rerun.io](https://app.rerun.io/pr/5510/index.html?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json) * [x] The PR title and labels are set such as to maximize their usefulness for the next release's CHANGELOG * [x] If applicable, add a new check to the [release checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)! - [PR Build Summary](https://build.rerun.io/pr/5510) - [Docs preview](https://rerun.io/preview/f2544987580d94f0d912eeaa224a98d46745f2a4/docs) <!--DOCS-PREVIEW--> - [Examples preview](https://rerun.io/preview/f2544987580d94f0d912eeaa224a98d46745f2a4/examples) <!--EXAMPLES-PREVIEW--> - [Recent benchmark results](https://build.rerun.io/graphs/crates.html) - [Wasm size tracking](https://build.rerun.io/graphs/sizes.html)
1 parent ded3770 commit 8fad2ca

File tree

5 files changed

+47
-208
lines changed

5 files changed

+47
-208
lines changed

examples/python/arkit_scenes/main.py

Lines changed: 41 additions & 192 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,9 @@
88
from typing import Any, Tuple
99

1010
import cv2
11-
import matplotlib.pyplot as plt
1211
import numpy as np
13-
import numpy.typing as npt
1412
import rerun as rr # pip install rerun-sdk
13+
import rerun.blueprint as rbl
1514
import trimesh
1615
from download_dataset import AVAILABLE_RECORDINGS, ensure_recording_available
1716
from scipy.spatial.transform import Rotation as R
@@ -84,36 +83,28 @@
8483
bounding box is logged as a separate entity to the common [world/annotations](recording://world/annotations) parent.
8584
""".strip()
8685

86+
lowres_posed_entity_path = "world/camera_lowres"
87+
highres_entity_path = "world/camera_highres"
88+
8789

8890
def load_json(js_path: Path) -> dict[str, Any]:
8991
with open(js_path) as f:
9092
json_data: dict[str, Any] = json.load(f)
9193
return json_data
9294

9395

94-
def log_annotated_bboxes(annotation: dict[str, Any]) -> tuple[npt.NDArray[np.float64], list[str], list[Color]]:
96+
def log_annotated_bboxes(annotation: dict[str, Any]) -> None:
9597
"""
9698
Logs annotated oriented bounding boxes to Rerun.
9799
98-
We currently calculate and return the 3D bounding boxes keypoints, labels, and colors for each object to log them in
99-
each camera frame TODO(#3412): once resolved this can be removed.
100-
101100
annotation json file
102101
| |-- label: object name of bounding box
103102
| |-- axesLengths[x, y, z]: size of the origin bounding-box before transforming
104103
| |-- centroid[]: the translation matrix (1,3) of bounding-box
105104
| |-- normalizedAxes[]: the rotation matrix (3,3) of bounding-box
106105
"""
107-
bbox_list = []
108-
bbox_labels = []
109-
num_objects = len(annotation["data"])
110-
# Generate a color per object that can be reused across both 3D obb and their 2D projections
111-
# TODO(#3412, #1728): once resolved this can be removed
112-
color_positions = np.linspace(0, 1, num_objects)
113-
colormap = plt.colormaps["viridis"]
114-
colors = [colormap(pos) for pos in color_positions]
115-
116-
for i, label_info in enumerate(annotation["data"]):
106+
107+
for label_info in annotation["data"]:
117108
uid = label_info["uid"]
118109
label = label_info["label"]
119110

@@ -130,184 +121,25 @@ def log_annotated_bboxes(annotation: dict[str, Any]) -> tuple[npt.NDArray[np.flo
130121
centers=centroid,
131122
rotations=rr.Quaternion(xyzw=rot.as_quat()),
132123
labels=label,
133-
colors=colors[i],
134124
),
135125
timeless=True,
136126
)
137127

138-
box3d = compute_box_3d(half_size, centroid, rotation)
139-
bbox_list.append(box3d)
140-
bbox_labels.append(label)
141-
bboxes_3d = np.array(bbox_list)
142-
return bboxes_3d, bbox_labels, colors
143-
144-
145-
def compute_box_3d(
146-
half_size: npt.NDArray[np.float64], transform: npt.NDArray[np.float64], rotation: npt.NDArray[np.float64]
147-
) -> npt.NDArray[np.float64]:
148-
"""
149-
Given obb compute 3D keypoints of the box.
150-
151-
TODO(#3412): once resolved this can be removed
152-
"""
153-
length, height, width = half_size.tolist()
154-
center = np.reshape(transform, (-1, 3))
155-
center = center.reshape(3)
156-
x_corners = [length, length, -length, -length, length, length, -length, -length]
157-
y_corners = [height, -height, -height, height, height, -height, -height, height]
158-
z_corners = [width, width, width, width, -width, -width, -width, -width]
159-
corners_3d = np.dot(np.transpose(rotation), np.vstack([x_corners, y_corners, z_corners]))
160-
161-
corners_3d[0, :] += center[0]
162-
corners_3d[1, :] += center[1]
163-
corners_3d[2, :] += center[2]
164-
bbox3d_raw = np.transpose(corners_3d)
165-
return bbox3d_raw
166-
167-
168-
def log_line_segments(entity_path: str, bboxes_2d_filtered: npt.NDArray[np.float64], color: Color, label: str) -> None:
169-
"""
170-
Generates line segments for each object's bounding box in 2D.
171-
172-
Box corner order that we return is of the format below:
173-
6 -------- 7
174-
/| /|
175-
5 -------- 4 .
176-
| | | |
177-
. 2 -------- 3
178-
|/ |/
179-
1 -------- 0
180-
181-
TODO(#3412): once resolved this can be removed
182-
183-
:param bboxes_2d_filtered:
184-
A numpy array of shape (8, 2), representing the filtered 2D keypoints of the 3D bounding boxes.
185-
:return: A numpy array of shape (24, 2), representing the line segments for each object's bounding boxes.
186-
Even and odd indices represent the start and end points of each line segment respectively.
187-
"""
188-
189-
# Calculate the centroid of the 2D keypoints
190-
valid_points = bboxes_2d_filtered[~np.isnan(bboxes_2d_filtered).any(axis=1)]
191-
192-
# log centroid and add label so that object label is visible in the 2D view
193-
if valid_points.size > 0:
194-
centroid = valid_points.mean(axis=0)
195-
rr.log(f"{entity_path}/centroid", rr.Points2D(centroid, colors=color, labels=label))
196-
else:
197-
pass
198-
199-
segments = [
200-
# bottom of bbox
201-
[bboxes_2d_filtered[0], bboxes_2d_filtered[1]],
202-
[bboxes_2d_filtered[1], bboxes_2d_filtered[2]],
203-
[bboxes_2d_filtered[2], bboxes_2d_filtered[3]],
204-
[bboxes_2d_filtered[3], bboxes_2d_filtered[0]],
205-
# top of bbox
206-
[bboxes_2d_filtered[4], bboxes_2d_filtered[5]],
207-
[bboxes_2d_filtered[5], bboxes_2d_filtered[6]],
208-
[bboxes_2d_filtered[6], bboxes_2d_filtered[7]],
209-
[bboxes_2d_filtered[7], bboxes_2d_filtered[4]],
210-
# sides of bbox
211-
[bboxes_2d_filtered[0], bboxes_2d_filtered[4]],
212-
[bboxes_2d_filtered[1], bboxes_2d_filtered[5]],
213-
[bboxes_2d_filtered[2], bboxes_2d_filtered[6]],
214-
[bboxes_2d_filtered[3], bboxes_2d_filtered[7]],
215-
]
216-
217-
rr.log(entity_path, rr.LineStrips2D(segments, colors=color))
218-
219-
220-
def project_3d_bboxes_to_2d_keypoints(
221-
bboxes_3d: npt.NDArray[np.float64],
222-
camera_from_world: rr.TranslationRotationScale3D,
223-
intrinsic: npt.NDArray[np.float64],
224-
img_width: int,
225-
img_height: int,
226-
) -> npt.NDArray[np.float64]:
227-
"""
228-
Returns 2D keypoints of the 3D bounding box in the camera view.
229-
230-
TODO(#3412): once resolved this can be removed
231-
Args:
232-
bboxes_3d: (nObjects, 8, 3) containing the 3D bounding box keypoints in world frame.
233-
camera_from_world: Tuple containing the camera translation and rotation_quaternion in world frame.
234-
intrinsic: (3,3) containing the camera intrinsic matrix.
235-
img_width: Width of the image.
236-
img_height: Height of the image.
237-
238-
Returns
239-
-------
240-
bboxes_2d_filtered:
241-
A numpy array of shape (nObjects, 8, 2), representing the 2D keypoints of the 3D bounding boxes. That
242-
are within the image frame.
243-
244-
"""
245-
246-
translation, rotation_q = camera_from_world.translation, camera_from_world.rotation
247-
# We know we stored the rotation as a quaternion, so extract it again.
248-
# TODO(#3467): This shouldn't directly access rotation.inner
249-
rotation = R.from_quat(rotation_q.inner) # type: ignore[union-attr]
250-
251-
# Transform 3D keypoints from world to camera frame
252-
world_to_camera_rotation = rotation.as_matrix()
253-
world_to_camera_translation = np.array(translation).reshape(3, 1)
254-
# Tile translation to match bounding box shape, (nObjects, 1, 3)
255-
world_to_camera_translation_tiled = np.tile(world_to_camera_translation.T, (bboxes_3d.shape[0], 1, 1))
256-
# Transform 3D bounding box keypoints from world to camera frame to filter out points behind the camera
257-
camera_points = (
258-
np.einsum("ij,afj->afi", world_to_camera_rotation, bboxes_3d[..., :3]) + world_to_camera_translation_tiled
259-
)
260-
# Check if the points are in front of the camera
261-
depth_mask = camera_points[..., 2] > 0
262-
# convert to transformation matrix shape of (3, 4)
263-
world_to_camera = np.hstack([world_to_camera_rotation, world_to_camera_translation])
264-
transformation_matrix = intrinsic @ world_to_camera
265-
# add batch dimension to match bounding box shape, (nObjects, 3, 4)
266-
transformation_matrix = np.tile(transformation_matrix, (bboxes_3d.shape[0], 1, 1))
267-
# bboxes_3d: [nObjects, 8, 3] -> [nObjects, 8, 4] to allow for batch projection
268-
bboxes_3d = np.concatenate([bboxes_3d, np.ones((bboxes_3d.shape[0], bboxes_3d.shape[1], 1))], axis=-1)
269-
# Apply depth mask to filter out points behind the camera
270-
bboxes_3d[~depth_mask] = np.nan
271-
# batch projection of points using einsum
272-
bboxes_2d = np.einsum("vab,fnb->vfna", transformation_matrix, bboxes_3d)
273-
bboxes_2d = bboxes_2d[..., :2] / bboxes_2d[..., 2:]
274-
# nViews irrelevant, squeeze out
275-
bboxes_2d = bboxes_2d[0]
276-
277-
# Filter out keypoints that are not within the frame
278-
mask_x = (bboxes_2d[:, :, 0] >= 0) & (bboxes_2d[:, :, 0] < img_width)
279-
mask_y = (bboxes_2d[:, :, 1] >= 0) & (bboxes_2d[:, :, 1] < img_height)
280-
mask = mask_x & mask_y
281-
bboxes_2d_filtered = np.where(mask[..., np.newaxis], bboxes_2d, np.nan)
282-
283-
return bboxes_2d_filtered
284-
285128

286129
def log_camera(
287130
intri_path: Path,
288131
frame_id: str,
289132
poses_from_traj: dict[str, rr.TranslationRotationScale3D],
290133
entity_id: str,
291-
bboxes: npt.NDArray[np.float64],
292-
bbox_labels: list[str],
293-
colors: list[Color],
294134
) -> None:
295135
"""Logs camera transform and 3D bounding boxes in the image frame."""
296136
w, h, fx, fy, cx, cy = np.loadtxt(intri_path)
297137
intrinsic = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
298138
camera_from_world = poses_from_traj[frame_id]
299139

300-
# TODO(#3412): once resolved this can be removed
301-
# Project 3D bounding boxes into 2D image
302-
bboxes_2d = project_3d_bboxes_to_2d_keypoints(bboxes, camera_from_world, intrinsic, img_width=w, img_height=h)
303-
304140
# clear previous centroid labels
305141
rr.log(f"{entity_id}/bbox-2D-segments", rr.Clear(recursive=True))
306142

307-
# Log line segments for each bounding box in the image
308-
for i, (label, bbox_2d) in enumerate(zip(bbox_labels, bboxes_2d)):
309-
log_line_segments(f"{entity_id}/bbox-2D-segments/{label}", bbox_2d.reshape(-1, 2), colors[i], label)
310-
311143
# pathlib makes it easy to get the parent, but log methods requires a string
312144
rr.log(entity_id, rr.Transform3D(transform=camera_from_world))
313145
rr.log(entity_id, rr.Pinhole(image_from_camera=intrinsic, resolution=[w, h]))
@@ -430,10 +262,7 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None:
430262
# load the obb annotations and log them in the world frame
431263
bbox_annotations_path = recording_path / f"{recording_path.stem}_3dod_annotation.json"
432264
annotation = load_json(bbox_annotations_path)
433-
bboxes_3d, bbox_labels, colors_list = log_annotated_bboxes(annotation)
434-
435-
lowres_posed_entity_id = "world/camera_lowres"
436-
highres_entity_id = "world/camera_highres"
265+
log_annotated_bboxes(annotation)
437266

438267
print("Processing frames…")
439268
for frame_timestamp in tqdm(lowres_frame_ids):
@@ -453,14 +282,11 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None:
453282
lowres_intri_path,
454283
frame_timestamp,
455284
camera_from_world_dict,
456-
lowres_posed_entity_id,
457-
bboxes_3d,
458-
bbox_labels,
459-
colors_list,
285+
lowres_posed_entity_path,
460286
)
461287

462-
rr.log(f"{lowres_posed_entity_id}/rgb", rr.Image(rgb).compress(jpeg_quality=95))
463-
rr.log(f"{lowres_posed_entity_id}/depth", rr.DepthImage(depth, meter=1000))
288+
rr.log(f"{lowres_posed_entity_path}/rgb", rr.Image(rgb).compress(jpeg_quality=95))
289+
rr.log(f"{lowres_posed_entity_path}/depth", rr.DepthImage(depth, meter=1000))
464290

465291
# log the high res camera
466292
if high_res_exists:
@@ -472,10 +298,7 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None:
472298
highres_intri_path,
473299
closest_lowres_frame_id,
474300
camera_from_world_dict,
475-
highres_entity_id,
476-
bboxes_3d,
477-
bbox_labels,
478-
colors_list,
301+
highres_entity_path,
479302
)
480303

481304
# load the highres image and depth if they exist
@@ -484,8 +307,8 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None:
484307

485308
highres_rgb = cv2.cvtColor(highres_bgr, cv2.COLOR_BGR2RGB)
486309

487-
rr.log(f"{highres_entity_id}/rgb", rr.Image(highres_rgb).compress(jpeg_quality=75))
488-
rr.log(f"{highres_entity_id}/depth", rr.DepthImage(highres_depth, meter=1000))
310+
rr.log(f"{highres_entity_path}/rgb", rr.Image(highres_rgb).compress(jpeg_quality=75))
311+
rr.log(f"{highres_entity_path}/depth", rr.DepthImage(highres_depth, meter=1000))
489312

490313

491314
def main() -> None:
@@ -505,7 +328,33 @@ def main() -> None:
505328
rr.script_add_args(parser)
506329
args = parser.parse_args()
507330

508-
rr.script_setup(args, "rerun_example_arkit_scenes")
331+
primary_camera_entity = highres_entity_path if args.include_highres else lowres_posed_entity_path
332+
333+
rr.script_setup(
334+
args,
335+
"rerun_example_arkit_scenes",
336+
blueprint=rbl.Horizontal(
337+
rbl.Spatial3DView(name="3D"),
338+
rbl.Vertical(
339+
rbl.Tabs(
340+
# Note that we re-project the annotations into the 2D views:
341+
# For this to work, the origin of the 2D views has to be a pinhole camera,
342+
# this way the viewer knows how to project the 3D annotations into the 2D views.
343+
rbl.Spatial2DView(
344+
name="RGB",
345+
origin=primary_camera_entity,
346+
contents=[f"{primary_camera_entity}/rgb", "/world/annotations/**"],
347+
),
348+
rbl.Spatial2DView(
349+
name="Depth",
350+
origin=primary_camera_entity,
351+
contents=[f"{primary_camera_entity}/depth", "/world/annotations/**"],
352+
),
353+
),
354+
rbl.TextDocumentView(name="Readme"),
355+
),
356+
),
357+
)
509358
recording_path = ensure_recording_available(args.video_id, args.include_highres)
510359
log_arkit(recording_path, args.include_highres)
511360

pixi.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,13 +77,14 @@ lint-rs-all = "cargo fmt --check"
7777
lint-py-fmt-check = "ruff format --check --config rerun_py/pyproject.toml"
7878
lint-py-blackdoc = "blackdoc --check"
7979
lint-py-mypy = "mypy --install-types --non-interactive --no-warn-unused-ignore"
80-
lint-py-ruff = "ruff check --config rerun_py/pyproject.toml"
80+
lint-py-ruff = "ruff format --check --config rerun_py/pyproject.toml"
8181
lint-taplo = "taplo fmt --check --diff"
8282
lint-typos = "typos"
8383

8484
misc-fmt = "prettier --write '**/*.{yml,yaml,js,css,html}'"
8585
misc-fmt-check = "prettier --check '**/*.{yml,yaml,js,css,html}'"
8686
toml-fmt = "taplo fmt"
87+
ruff-fmt = "ruff format --config rerun_py/pyproject.toml ."
8788
ruff-fix = "ruff --fix --config rerun_py/pyproject.toml ."
8889

8990
py-build = "maturin develop --manifest-path rerun_py/Cargo.toml --extras=tests"

rerun_py/rerun_sdk/rerun/blueprint/.gitattributes

Lines changed: 0 additions & 11 deletions
This file was deleted.

rerun_py/rerun_sdk/rerun/script_helpers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def script_setup(
6565
args: Namespace,
6666
application_id: str,
6767
recording_id: str | UUID | None = None,
68-
blueprint: rr.blueprint.Blueprint | None = None,
68+
blueprint: rr.blueprint.BlueprintLike | None = None,
6969
) -> RecordingStream:
7070
"""
7171
Run common Rerun script setup actions. Connect to the viewer if necessary.
@@ -87,7 +87,7 @@ def script_setup(
8787
processes to log to the same Rerun instance (and be part of the same recording),
8888
you will need to manually assign them all the same recording_id.
8989
Any random UUIDv4 will work, or copy the recording id for the parent process.
90-
blueprint : Optional[rr.blueprint.Blueprint]
90+
blueprint : Optional[rr.blueprint.BlueprintLike]
9191
An optional blueprint to use for the viewer.
9292
9393
"""

rerun_py/tests/unit/test_container_blueprint.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from __future__ import annotations
22

33
import itertools
4-
from typing import Optional, cast
4+
from typing import Any, Optional, Sequence, cast
55

66
from rerun.blueprint.archetypes.container_blueprint import ContainerBlueprint
77
from rerun.blueprint.components.active_tab import ActiveTab, ActiveTabBatch
@@ -30,7 +30,7 @@ def test_container_blueprint() -> None:
3030
"my container",
3131
]
3232

33-
contents_arrays = [
33+
contents_arrays: Sequence[Any] = [
3434
None,
3535
[],
3636
["space_view/1234", "container/5678"],

0 commit comments

Comments
 (0)