[Fix] Use opencv backend in Webcam API visualization (#2089)

Ben-Louis · web-flow · commit 3af7fed7d5d1 · 2023-03-20T17:28:57.000+08:00
diff --git a/demo/docs/webcam_api_demo.md b/demo/docs/webcam_api_demo.md
@@ -12,17 +12,17 @@ Launch the demo from the mmpose root directory:
 
 ```shell
 # Run webcam demo with GPU
-python demo/webcam_demo.py
+python demo/webcam_api_demo.py
 
 # Run webcam demo with CPU
-python demo/webcam_demo.py --cpu
+python demo/webcam_api_demo.py --cpu
 ```
 
 The command above will use the default config file `demo/webcam_cfg/pose_estimation.py`. You can also specify the config file in the command:
 
 ```shell
 # Use the config "pose_tracking.py" for higher infererence speed
-python demo/webcam_demo.py --config demo/webcam_cfg/pose_estimation.py
+python demo/webcam_api_demo.py --config demo/webcam_cfg/pose_estimation.py
 ```
 
 ### Hotkeys
@@ -36,7 +36,7 @@ python demo/webcam_demo.py --config demo/webcam_cfg/pose_estimation.py
 | m      | Show the monitoring information.      |
 | q      | Exit.                                 |
 
-Note that the demo will automatically save the output video into a file `webcam_demo.mp4`.
+Note that the demo will automatically save the output video into a file `webcam_api_demo.mp4`.
 
 ### Usage and configuarations
 
@@ -103,5 +103,5 @@ Detailed configurations can be found in the config file.
   Run follow command for a quick test of video capturing and displaying.
 
   ```shell
-  python demo/webcam_demo.py --config demo/webcam_cfg/test_camera.py
+  python demo/webcam_api_demo.py --config demo/webcam_cfg/test_camera.py
   ```
diff --git a/demo/webcam_api_demo.py b/demo/webcam_api_demo.py
@@ -1,6 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 
 import logging
+import warnings
 from argparse import ArgumentParser
 
 from mmengine import Config, DictAction
@@ -51,6 +52,10 @@ def set_device(cfg: Config, device: str):
 
 
 def run():
+
+    warnings.warn('The Webcam API will be deprecated in future. ',
+                  DeprecationWarning)
+
     args = parse_args()
     cfg = Config.fromfile(args.config)
     cfg.merge_from_dict(args.cfg_options)
diff --git a/demo/webcam_cfg/pose_estimation.py b/demo/webcam_cfg/pose_estimation.py
@@ -129,7 +129,7 @@
         dict(
             type='RecorderNode',
             name='recorder',
-            out_video_file='webcam_demo.mp4',
+            out_video_file='webcam_api_demo.mp4',
             input_buffer='display',
             output_buffer='_display_'
             # `_display_` is an executor-reserved buffer
diff --git a/demo/webcam_cfg/test_camera.py b/demo/webcam_cfg/test_camera.py
@@ -16,7 +16,7 @@
         dict(
             type='RecorderNode',
             name='recorder',
-            out_video_file='webcam_output.mp4',
+            out_video_file='webcam_api_output.mp4',
             input_buffer='display',
             output_buffer='_display_')
     ])
diff --git a/mmpose/apis/webcam/nodes/visualizer_nodes/object_visualizer_node.py b/mmpose/apis/webcam/nodes/visualizer_nodes/object_visualizer_node.py
@@ -1,17 +1,190 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from collections import defaultdict
+import math
+from itertools import groupby
 from typing import Dict, List, Optional, Tuple, Union
 
+import cv2
+import mmcv
 import numpy as np
-from mmengine.structures import InstanceData
 
-from mmpose.structures import PoseDataSample
-from mmpose.visualization import PoseLocalVisualizer
 from ...utils import FrameMessage
 from ..base_visualizer_node import BaseVisualizerNode
 from ..registry import NODES
 
 
+def imshow_bboxes(img,
+                  bboxes,
+                  labels=None,
+                  colors='green',
+                  text_color='white',
+                  thickness=1,
+                  font_scale=0.5):
+    """Draw bboxes with labels (optional) on an image. This is a wrapper of
+    mmcv.imshow_bboxes.
+
+    Args:
+        img (str or ndarray): The image to be displayed.
+        bboxes (ndarray): ndarray of shape (k, 4), each row is a bbox in
+            format [x1, y1, x2, y2].
+        labels (str or list[str], optional): labels of each bbox.
+        colors (list[str or tuple or :obj:`Color`]): A list of colors.
+        text_color (str or tuple or :obj:`Color`): Color of texts.
+        thickness (int): Thickness of lines.
+        font_scale (float): Font scales of texts.
+
+    Returns:
+        ndarray: The image with bboxes drawn on it.
+    """
+
+    # adapt to mmcv.imshow_bboxes input format
+    bboxes = np.split(
+        bboxes, bboxes.shape[0], axis=0) if bboxes.shape[0] > 0 else []
+    if not isinstance(colors, list):
+        colors = [colors for _ in range(len(bboxes))]
+    colors = [mmcv.color_val(c) for c in colors]
+    assert len(bboxes) == len(colors)
+
+    img = mmcv.imshow_bboxes(
+        img,
+        bboxes,
+        colors,
+        top_k=-1,
+        thickness=thickness,
+        show=False,
+        out_file=None)
+
+    if labels is not None:
+        if not isinstance(labels, list):
+            labels = [labels for _ in range(len(bboxes))]
+        assert len(labels) == len(bboxes)
+
+        for bbox, label, color in zip(bboxes, labels, colors):
+            if label is None:
+                continue
+            bbox_int = bbox[0, :4].astype(np.int32)
+            # roughly estimate the proper font size
+            text_size, text_baseline = cv2.getTextSize(label,
+                                                       cv2.FONT_HERSHEY_DUPLEX,
+                                                       font_scale, thickness)
+            text_x1 = bbox_int[0]
+            text_y1 = max(0, bbox_int[1] - text_size[1] - text_baseline)
+            text_x2 = bbox_int[0] + text_size[0]
+            text_y2 = text_y1 + text_size[1] + text_baseline
+            cv2.rectangle(img, (text_x1, text_y1), (text_x2, text_y2), color,
+                          cv2.FILLED)
+            cv2.putText(img, label, (text_x1, text_y2 - text_baseline),
+                        cv2.FONT_HERSHEY_DUPLEX, font_scale,
+                        mmcv.color_val(text_color), thickness)
+
+    return img
+
+
+def imshow_keypoints(img,
+                     pose_result,
+                     skeleton=None,
+                     kpt_score_thr=0.3,
+                     pose_kpt_color=None,
+                     pose_link_color=None,
+                     radius=4,
+                     thickness=1,
+                     show_keypoint_weight=False):
+    """Draw keypoints and links on an image.
+
+    Args:
+            img (str or Tensor): The image to draw poses on. If an image array
+                is given, id will be modified in-place.
+            pose_result (list[kpts]): The poses to draw. Each element kpts is
+                a set of K keypoints as an Kx3 numpy.ndarray, where each
+                keypoint is represented as x, y, score.
+            kpt_score_thr (float, optional): Minimum score of keypoints
+                to be shown. Default: 0.3.
+            pose_kpt_color (np.array[Nx3]`): Color of N keypoints. If None,
+                the keypoint will not be drawn.
+            pose_link_color (np.array[Mx3]): Color of M links. If None, the
+                links will not be drawn.
+            thickness (int): Thickness of lines.
+    """
+
+    img = mmcv.imread(img)
+    img_h, img_w, _ = img.shape
+
+    for kpts in pose_result:
+
+        kpts = np.array(kpts, copy=False)
+
+        # draw each point on image
+        if pose_kpt_color is not None:
+            assert len(pose_kpt_color) == len(kpts)
+
+            for kid, kpt in enumerate(kpts):
+                x_coord, y_coord, kpt_score = int(kpt[0]), int(kpt[1]), kpt[2]
+
+                if kpt_score < kpt_score_thr or pose_kpt_color[kid] is None:
+                    # skip the point that should not be drawn
+                    continue
+
+                color = tuple(int(c) for c in pose_kpt_color[kid])
+                if show_keypoint_weight:
+                    img_copy = img.copy()
+                    cv2.circle(img_copy, (int(x_coord), int(y_coord)), radius,
+                               color, -1)
+                    transparency = max(0, min(1, kpt_score))
+                    cv2.addWeighted(
+                        img_copy,
+                        transparency,
+                        img,
+                        1 - transparency,
+                        0,
+                        dst=img)
+                else:
+                    cv2.circle(img, (int(x_coord), int(y_coord)), radius,
+                               color, -1)
+
+        # draw links
+        if skeleton is not None and pose_link_color is not None:
+            assert len(pose_link_color) == len(skeleton)
+
+            for sk_id, sk in enumerate(skeleton):
+                pos1 = (int(kpts[sk[0], 0]), int(kpts[sk[0], 1]))
+                pos2 = (int(kpts[sk[1], 0]), int(kpts[sk[1], 1]))
+
+                if (pos1[0] <= 0 or pos1[0] >= img_w or pos1[1] <= 0
+                        or pos1[1] >= img_h or pos2[0] <= 0 or pos2[0] >= img_w
+                        or pos2[1] <= 0 or pos2[1] >= img_h
+                        or kpts[sk[0], 2] < kpt_score_thr
+                        or kpts[sk[1], 2] < kpt_score_thr
+                        or pose_link_color[sk_id] is None):
+                    # skip the link that should not be drawn
+                    continue
+                color = tuple(int(c) for c in pose_link_color[sk_id])
+                if show_keypoint_weight:
+                    img_copy = img.copy()
+                    X = (pos1[0], pos2[0])
+                    Y = (pos1[1], pos2[1])
+                    mX = np.mean(X)
+                    mY = np.mean(Y)
+                    length = ((Y[0] - Y[1])**2 + (X[0] - X[1])**2)**0.5
+                    angle = math.degrees(math.atan2(Y[0] - Y[1], X[0] - X[1]))
+                    stickwidth = 2
+                    polygon = cv2.ellipse2Poly(
+                        (int(mX), int(mY)), (int(length / 2), int(stickwidth)),
+                        int(angle), 0, 360, 1)
+                    cv2.fillConvexPoly(img_copy, polygon, color)
+                    transparency = max(
+                        0, min(1, 0.5 * (kpts[sk[0], 2] + kpts[sk[1], 2])))
+                    cv2.addWeighted(
+                        img_copy,
+                        transparency,
+                        img,
+                        1 - transparency,
+                        0,
+                        dst=img)
+                else:
+                    cv2.line(img, pos1, pos2, color, thickness=thickness)
+
+    return img
+
+
 @NODES.register_module()
 class ObjectVisualizerNode(BaseVisualizerNode):
     """Visualize the bounding box and keypoints of objects.
@@ -91,12 +264,11 @@ def __init__(self,
         self.show_bbox = show_bbox
         self.show_keypoint = show_keypoint
         self.must_have_keypoint = must_have_keypoint
+        self.radius = radius
+        self.thickness = thickness
 
-        self.visualizer = PoseLocalVisualizer(
-            name='webcam', radius=radius, line_width=thickness)
-
-    def draw(self, input_msg: FrameMessage) -> np.ndarray:
-        canvas = input_msg.get_image()
+    def _draw_bbox(self, canvas: np.ndarray, input_msg: FrameMessage):
+        """Draw object bboxes."""
 
         if self.must_have_keypoint:
             objects = input_msg.get_objects(
@@ -107,49 +279,63 @@ def draw(self, input_msg: FrameMessage) -> np.ndarray:
         if not objects:
             return canvas
 
-        objects_by_label = defaultdict(list)
-        for object in objects:
-            objects_by_label[object['label']].append(object)
-
-        # draw objects of each category individually
-        for label, objects in objects_by_label.items():
-            dataset_meta = objects[0]['dataset_meta']
-            dataset_meta['bbox_color'] = self.default_bbox_color.get(
-                label, self.bbox_color)
-            self.visualizer.set_dataset_meta(dataset_meta)
-
-            # assign bboxes, keypoints and other predictions to data_sample
-            instances = InstanceData()
-            instances.bboxes = np.stack([object['bbox'] for object in objects])
-            instances.labels = np.array(
-                [object['class_id'] for object in objects])
-            if self.show_keypoint:
-                keypoints = [
-                    object['keypoints'] for object in objects
-                    if 'keypoints' in object
-                ]
-                if len(keypoints):
-                    instances.keypoints = np.stack(keypoints)
-                keypoint_scores = [
-                    object['keypoint_scores'] for object in objects
-                    if 'keypoint_scores' in object
-                ]
-                if len(keypoint_scores):
-                    instances.keypoint_scores = np.stack(keypoint_scores)
-            data_sample = PoseDataSample()
-            data_sample.pred_instances = instances
-
-            self.visualizer.add_datasample(
-                'result',
+        bboxes = [obj['bbox'] for obj in objects]
+        labels = [obj.get('label', None) for obj in objects]
+        default_color = (0, 255, 0)
+
+        # Get bbox colors
+        if isinstance(self.bbox_color, dict):
+            colors = [
+                self.bbox_color.get(label, default_color) for label in labels
+            ]
+        else:
+            colors = self.bbox_color
+
+        imshow_bboxes(
+            canvas,
+            np.vstack(bboxes),
+            labels=labels,
+            colors=colors,
+            text_color='white',
+            font_scale=0.5)
+
+        return canvas
+
+    def _draw_keypoint(self, canvas: np.ndarray, input_msg: FrameMessage):
+        """Draw object keypoints."""
+        objects = input_msg.get_objects(lambda x: 'pose_model_cfg' in x)
+
+        # return if there is no object with keypoints
+        if not objects:
+            return canvas
+
+        for model_cfg, group in groupby(objects,
+                                        lambda x: x['pose_model_cfg']):
+            dataset_info = objects[0]['dataset_meta']
+            keypoints = [
+                np.concatenate(
+                    (obj['keypoints'], obj['keypoint_scores'][:, None]),
+                    axis=1) for obj in group
+            ]
+            imshow_keypoints(
                 canvas,
-                data_sample=data_sample,
-                draw_gt=False,
-                draw_heatmap=False,
-                draw_bbox=True,
-                show=False,
-                wait_time=0,
-                out_file=None,
-                kpt_score_thr=self.kpt_thr)
-            canvas = self.visualizer.get_image()
+                keypoints,
+                skeleton=dataset_info['skeleton_links'],
+                kpt_score_thr=self.kpt_thr,
+                pose_kpt_color=dataset_info['keypoint_colors'],
+                pose_link_color=dataset_info['skeleton_link_colors'],
+                radius=self.radius,
+                thickness=self.thickness)
+
+        return canvas
+
+    def draw(self, input_msg: FrameMessage) -> np.ndarray:
+        canvas = input_msg.get_image()
+
+        if self.show_bbox:
+            canvas = self._draw_bbox(canvas, input_msg)
+
+        if self.show_keypoint:
+            canvas = self._draw_keypoint(canvas, input_msg)
 
         return canvas