88from typing import Any , Tuple
99
1010import cv2
11- import matplotlib .pyplot as plt
1211import numpy as np
13- import numpy .typing as npt
1412import rerun as rr # pip install rerun-sdk
13+ import rerun .blueprint as rbl
1514import trimesh
1615from download_dataset import AVAILABLE_RECORDINGS , ensure_recording_available
1716from scipy .spatial .transform import Rotation as R
8483bounding box is logged as a separate entity to the common [world/annotations](recording://world/annotations) parent.
8584""" .strip ()
8685
86+ lowres_posed_entity_path = "world/camera_lowres"
87+ highres_entity_path = "world/camera_highres"
88+
8789
8890def load_json (js_path : Path ) -> dict [str , Any ]:
8991 with open (js_path ) as f :
9092 json_data : dict [str , Any ] = json .load (f )
9193 return json_data
9294
9395
94- def log_annotated_bboxes (annotation : dict [str , Any ]) -> tuple [ npt . NDArray [ np . float64 ], list [ str ], list [ Color ]] :
96+ def log_annotated_bboxes (annotation : dict [str , Any ]) -> None :
9597 """
9698 Logs annotated oriented bounding boxes to Rerun.
9799
98- We currently calculate and return the 3D bounding boxes keypoints, labels, and colors for each object to log them in
99- each camera frame TODO(#3412): once resolved this can be removed.
100-
101100 annotation json file
102101 | |-- label: object name of bounding box
103102 | |-- axesLengths[x, y, z]: size of the origin bounding-box before transforming
104103 | |-- centroid[]: the translation matrix (1,3) of bounding-box
105104 | |-- normalizedAxes[]: the rotation matrix (3,3) of bounding-box
106105 """
107- bbox_list = []
108- bbox_labels = []
109- num_objects = len (annotation ["data" ])
110- # Generate a color per object that can be reused across both 3D obb and their 2D projections
111- # TODO(#3412, #1728): once resolved this can be removed
112- color_positions = np .linspace (0 , 1 , num_objects )
113- colormap = plt .colormaps ["viridis" ]
114- colors = [colormap (pos ) for pos in color_positions ]
115-
116- for i , label_info in enumerate (annotation ["data" ]):
106+
107+ for label_info in annotation ["data" ]:
117108 uid = label_info ["uid" ]
118109 label = label_info ["label" ]
119110
@@ -130,184 +121,25 @@ def log_annotated_bboxes(annotation: dict[str, Any]) -> tuple[npt.NDArray[np.flo
130121 centers = centroid ,
131122 rotations = rr .Quaternion (xyzw = rot .as_quat ()),
132123 labels = label ,
133- colors = colors [i ],
134124 ),
135125 timeless = True ,
136126 )
137127
138- box3d = compute_box_3d (half_size , centroid , rotation )
139- bbox_list .append (box3d )
140- bbox_labels .append (label )
141- bboxes_3d = np .array (bbox_list )
142- return bboxes_3d , bbox_labels , colors
143-
144-
145- def compute_box_3d (
146- half_size : npt .NDArray [np .float64 ], transform : npt .NDArray [np .float64 ], rotation : npt .NDArray [np .float64 ]
147- ) -> npt .NDArray [np .float64 ]:
148- """
149- Given obb compute 3D keypoints of the box.
150-
151- TODO(#3412): once resolved this can be removed
152- """
153- length , height , width = half_size .tolist ()
154- center = np .reshape (transform , (- 1 , 3 ))
155- center = center .reshape (3 )
156- x_corners = [length , length , - length , - length , length , length , - length , - length ]
157- y_corners = [height , - height , - height , height , height , - height , - height , height ]
158- z_corners = [width , width , width , width , - width , - width , - width , - width ]
159- corners_3d = np .dot (np .transpose (rotation ), np .vstack ([x_corners , y_corners , z_corners ]))
160-
161- corners_3d [0 , :] += center [0 ]
162- corners_3d [1 , :] += center [1 ]
163- corners_3d [2 , :] += center [2 ]
164- bbox3d_raw = np .transpose (corners_3d )
165- return bbox3d_raw
166-
167-
168- def log_line_segments (entity_path : str , bboxes_2d_filtered : npt .NDArray [np .float64 ], color : Color , label : str ) -> None :
169- """
170- Generates line segments for each object's bounding box in 2D.
171-
172- Box corner order that we return is of the format below:
173- 6 -------- 7
174- /| /|
175- 5 -------- 4 .
176- | | | |
177- . 2 -------- 3
178- |/ |/
179- 1 -------- 0
180-
181- TODO(#3412): once resolved this can be removed
182-
183- :param bboxes_2d_filtered:
184- A numpy array of shape (8, 2), representing the filtered 2D keypoints of the 3D bounding boxes.
185- :return: A numpy array of shape (24, 2), representing the line segments for each object's bounding boxes.
186- Even and odd indices represent the start and end points of each line segment respectively.
187- """
188-
189- # Calculate the centroid of the 2D keypoints
190- valid_points = bboxes_2d_filtered [~ np .isnan (bboxes_2d_filtered ).any (axis = 1 )]
191-
192- # log centroid and add label so that object label is visible in the 2D view
193- if valid_points .size > 0 :
194- centroid = valid_points .mean (axis = 0 )
195- rr .log (f"{ entity_path } /centroid" , rr .Points2D (centroid , colors = color , labels = label ))
196- else :
197- pass
198-
199- segments = [
200- # bottom of bbox
201- [bboxes_2d_filtered [0 ], bboxes_2d_filtered [1 ]],
202- [bboxes_2d_filtered [1 ], bboxes_2d_filtered [2 ]],
203- [bboxes_2d_filtered [2 ], bboxes_2d_filtered [3 ]],
204- [bboxes_2d_filtered [3 ], bboxes_2d_filtered [0 ]],
205- # top of bbox
206- [bboxes_2d_filtered [4 ], bboxes_2d_filtered [5 ]],
207- [bboxes_2d_filtered [5 ], bboxes_2d_filtered [6 ]],
208- [bboxes_2d_filtered [6 ], bboxes_2d_filtered [7 ]],
209- [bboxes_2d_filtered [7 ], bboxes_2d_filtered [4 ]],
210- # sides of bbox
211- [bboxes_2d_filtered [0 ], bboxes_2d_filtered [4 ]],
212- [bboxes_2d_filtered [1 ], bboxes_2d_filtered [5 ]],
213- [bboxes_2d_filtered [2 ], bboxes_2d_filtered [6 ]],
214- [bboxes_2d_filtered [3 ], bboxes_2d_filtered [7 ]],
215- ]
216-
217- rr .log (entity_path , rr .LineStrips2D (segments , colors = color ))
218-
219-
220- def project_3d_bboxes_to_2d_keypoints (
221- bboxes_3d : npt .NDArray [np .float64 ],
222- camera_from_world : rr .TranslationRotationScale3D ,
223- intrinsic : npt .NDArray [np .float64 ],
224- img_width : int ,
225- img_height : int ,
226- ) -> npt .NDArray [np .float64 ]:
227- """
228- Returns 2D keypoints of the 3D bounding box in the camera view.
229-
230- TODO(#3412): once resolved this can be removed
231- Args:
232- bboxes_3d: (nObjects, 8, 3) containing the 3D bounding box keypoints in world frame.
233- camera_from_world: Tuple containing the camera translation and rotation_quaternion in world frame.
234- intrinsic: (3,3) containing the camera intrinsic matrix.
235- img_width: Width of the image.
236- img_height: Height of the image.
237-
238- Returns
239- -------
240- bboxes_2d_filtered:
241- A numpy array of shape (nObjects, 8, 2), representing the 2D keypoints of the 3D bounding boxes. That
242- are within the image frame.
243-
244- """
245-
246- translation , rotation_q = camera_from_world .translation , camera_from_world .rotation
247- # We know we stored the rotation as a quaternion, so extract it again.
248- # TODO(#3467): This shouldn't directly access rotation.inner
249- rotation = R .from_quat (rotation_q .inner ) # type: ignore[union-attr]
250-
251- # Transform 3D keypoints from world to camera frame
252- world_to_camera_rotation = rotation .as_matrix ()
253- world_to_camera_translation = np .array (translation ).reshape (3 , 1 )
254- # Tile translation to match bounding box shape, (nObjects, 1, 3)
255- world_to_camera_translation_tiled = np .tile (world_to_camera_translation .T , (bboxes_3d .shape [0 ], 1 , 1 ))
256- # Transform 3D bounding box keypoints from world to camera frame to filter out points behind the camera
257- camera_points = (
258- np .einsum ("ij,afj->afi" , world_to_camera_rotation , bboxes_3d [..., :3 ]) + world_to_camera_translation_tiled
259- )
260- # Check if the points are in front of the camera
261- depth_mask = camera_points [..., 2 ] > 0
262- # convert to transformation matrix shape of (3, 4)
263- world_to_camera = np .hstack ([world_to_camera_rotation , world_to_camera_translation ])
264- transformation_matrix = intrinsic @ world_to_camera
265- # add batch dimension to match bounding box shape, (nObjects, 3, 4)
266- transformation_matrix = np .tile (transformation_matrix , (bboxes_3d .shape [0 ], 1 , 1 ))
267- # bboxes_3d: [nObjects, 8, 3] -> [nObjects, 8, 4] to allow for batch projection
268- bboxes_3d = np .concatenate ([bboxes_3d , np .ones ((bboxes_3d .shape [0 ], bboxes_3d .shape [1 ], 1 ))], axis = - 1 )
269- # Apply depth mask to filter out points behind the camera
270- bboxes_3d [~ depth_mask ] = np .nan
271- # batch projection of points using einsum
272- bboxes_2d = np .einsum ("vab,fnb->vfna" , transformation_matrix , bboxes_3d )
273- bboxes_2d = bboxes_2d [..., :2 ] / bboxes_2d [..., 2 :]
274- # nViews irrelevant, squeeze out
275- bboxes_2d = bboxes_2d [0 ]
276-
277- # Filter out keypoints that are not within the frame
278- mask_x = (bboxes_2d [:, :, 0 ] >= 0 ) & (bboxes_2d [:, :, 0 ] < img_width )
279- mask_y = (bboxes_2d [:, :, 1 ] >= 0 ) & (bboxes_2d [:, :, 1 ] < img_height )
280- mask = mask_x & mask_y
281- bboxes_2d_filtered = np .where (mask [..., np .newaxis ], bboxes_2d , np .nan )
282-
283- return bboxes_2d_filtered
284-
285128
286129def log_camera (
287130 intri_path : Path ,
288131 frame_id : str ,
289132 poses_from_traj : dict [str , rr .TranslationRotationScale3D ],
290133 entity_id : str ,
291- bboxes : npt .NDArray [np .float64 ],
292- bbox_labels : list [str ],
293- colors : list [Color ],
294134) -> None :
295135 """Logs camera transform and 3D bounding boxes in the image frame."""
296136 w , h , fx , fy , cx , cy = np .loadtxt (intri_path )
297137 intrinsic = np .array ([[fx , 0 , cx ], [0 , fy , cy ], [0 , 0 , 1 ]])
298138 camera_from_world = poses_from_traj [frame_id ]
299139
300- # TODO(#3412): once resolved this can be removed
301- # Project 3D bounding boxes into 2D image
302- bboxes_2d = project_3d_bboxes_to_2d_keypoints (bboxes , camera_from_world , intrinsic , img_width = w , img_height = h )
303-
304140 # clear previous centroid labels
305141 rr .log (f"{ entity_id } /bbox-2D-segments" , rr .Clear (recursive = True ))
306142
307- # Log line segments for each bounding box in the image
308- for i , (label , bbox_2d ) in enumerate (zip (bbox_labels , bboxes_2d )):
309- log_line_segments (f"{ entity_id } /bbox-2D-segments/{ label } " , bbox_2d .reshape (- 1 , 2 ), colors [i ], label )
310-
311143 # pathlib makes it easy to get the parent, but log methods requires a string
312144 rr .log (entity_id , rr .Transform3D (transform = camera_from_world ))
313145 rr .log (entity_id , rr .Pinhole (image_from_camera = intrinsic , resolution = [w , h ]))
@@ -430,10 +262,7 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None:
430262 # load the obb annotations and log them in the world frame
431263 bbox_annotations_path = recording_path / f"{ recording_path .stem } _3dod_annotation.json"
432264 annotation = load_json (bbox_annotations_path )
433- bboxes_3d , bbox_labels , colors_list = log_annotated_bboxes (annotation )
434-
435- lowres_posed_entity_id = "world/camera_lowres"
436- highres_entity_id = "world/camera_highres"
265+ log_annotated_bboxes (annotation )
437266
438267 print ("Processing frames…" )
439268 for frame_timestamp in tqdm (lowres_frame_ids ):
@@ -453,14 +282,11 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None:
453282 lowres_intri_path ,
454283 frame_timestamp ,
455284 camera_from_world_dict ,
456- lowres_posed_entity_id ,
457- bboxes_3d ,
458- bbox_labels ,
459- colors_list ,
285+ lowres_posed_entity_path ,
460286 )
461287
462- rr .log (f"{ lowres_posed_entity_id } /rgb" , rr .Image (rgb ).compress (jpeg_quality = 95 ))
463- rr .log (f"{ lowres_posed_entity_id } /depth" , rr .DepthImage (depth , meter = 1000 ))
288+ rr .log (f"{ lowres_posed_entity_path } /rgb" , rr .Image (rgb ).compress (jpeg_quality = 95 ))
289+ rr .log (f"{ lowres_posed_entity_path } /depth" , rr .DepthImage (depth , meter = 1000 ))
464290
465291 # log the high res camera
466292 if high_res_exists :
@@ -472,10 +298,7 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None:
472298 highres_intri_path ,
473299 closest_lowres_frame_id ,
474300 camera_from_world_dict ,
475- highres_entity_id ,
476- bboxes_3d ,
477- bbox_labels ,
478- colors_list ,
301+ highres_entity_path ,
479302 )
480303
481304 # load the highres image and depth if they exist
@@ -484,8 +307,8 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None:
484307
485308 highres_rgb = cv2 .cvtColor (highres_bgr , cv2 .COLOR_BGR2RGB )
486309
487- rr .log (f"{ highres_entity_id } /rgb" , rr .Image (highres_rgb ).compress (jpeg_quality = 75 ))
488- rr .log (f"{ highres_entity_id } /depth" , rr .DepthImage (highres_depth , meter = 1000 ))
310+ rr .log (f"{ highres_entity_path } /rgb" , rr .Image (highres_rgb ).compress (jpeg_quality = 75 ))
311+ rr .log (f"{ highres_entity_path } /depth" , rr .DepthImage (highres_depth , meter = 1000 ))
489312
490313
491314def main () -> None :
@@ -505,7 +328,33 @@ def main() -> None:
505328 rr .script_add_args (parser )
506329 args = parser .parse_args ()
507330
508- rr .script_setup (args , "rerun_example_arkit_scenes" )
331+ primary_camera_entity = highres_entity_path if args .include_highres else lowres_posed_entity_path
332+
333+ rr .script_setup (
334+ args ,
335+ "rerun_example_arkit_scenes" ,
336+ blueprint = rbl .Horizontal (
337+ rbl .Spatial3DView (name = "3D" ),
338+ rbl .Vertical (
339+ rbl .Tabs (
340+ # Note that we re-project the annotations into the 2D views:
341+ # For this to work, the origin of the 2D views has to be a pinhole camera,
342+ # this way the viewer knows how to project the 3D annotations into the 2D views.
343+ rbl .Spatial2DView (
344+ name = "RGB" ,
345+ origin = primary_camera_entity ,
346+ contents = [f"{ primary_camera_entity } /rgb" , "/world/annotations/**" ],
347+ ),
348+ rbl .Spatial2DView (
349+ name = "Depth" ,
350+ origin = primary_camera_entity ,
351+ contents = [f"{ primary_camera_entity } /depth" , "/world/annotations/**" ],
352+ ),
353+ ),
354+ rbl .TextDocumentView (name = "Readme" ),
355+ ),
356+ ),
357+ )
509358 recording_path = ensure_recording_available (args .video_id , args .include_highres )
510359 log_arkit (recording_path , args .include_highres )
511360
0 commit comments