From 71a36345ecdc294a30e818b36d52723216498781 Mon Sep 17 00:00:00 2001
From: lupeng <penglu2097@gmail.com>
Date: Sun, 12 Feb 2023 14:09:59 +0800
Subject: [PATCH 01/17] add alias in metafiles

---
 .../topdown_heatmap/ap10k/resnet_ap10k.yml    | 41 +++++++++++++++++++
 .../simcc/coco/resnet_coco.yml                |  1 +
 .../topdown_heatmap/wflw/hrnetv2_wflw.yml     | 27 ++++++++++++
 .../onehand10k/resnet_onehand10k.yml          | 24 +++++++++++
 model-index.yml                               |  3 ++
 5 files changed, 96 insertions(+)
 create mode 100644 configs/animal_2d_keypoint/topdown_heatmap/ap10k/resnet_ap10k.yml
 create mode 100644 configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_wflw.yml
 create mode 100644 configs/hand_2d_keypoint/topdown_heatmap/onehand10k/resnet_onehand10k.yml

diff --git a/configs/animal_2d_keypoint/topdown_heatmap/ap10k/resnet_ap10k.yml b/configs/animal_2d_keypoint/topdown_heatmap/ap10k/resnet_ap10k.yml
new file mode 100644
index 0000000000..11c6d912ac
--- /dev/null
+++ b/configs/animal_2d_keypoint/topdown_heatmap/ap10k/resnet_ap10k.yml
@@ -0,0 +1,41 @@
+Collections:
+- Name: SimpleBaseline2D
+  Paper:
+    Title: Simple baselines for human pose estimation and tracking
+    URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+  README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_res50_8xb64-210e_ap10k-256x256.py
+  In Collection: SimpleBaseline2D
+  Alias: animal
+  Metadata:
+    Architecture: &id001
+    - SimpleBaseline2D
+    Training Data: AP-10K
+  Name: topdown_heatmap_res50_ap10k_256x256
+  Results:
+  - Dataset: AP-10K
+    Metrics:
+      AP: 0.680
+      AP@0.5: 0.926
+      AP@0.75: 0.738
+      APL: 0.687
+      APM: 0.552
+    Task: Animal 2D Keypoint
+  Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_ap10k_256x256-35760eb8_20211029.pth
+- Config: configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_res101_8xb64-210e_ap10k-256x256.py
+  In Collection: SimpleBaseline2D
+  Metadata:
+    Architecture: *id001
+    Training Data: AP-10K
+  Name: topdown_heatmap_res101_ap10k_256x256
+  Results:
+  - Dataset: AP-10K
+    Metrics:
+      AP: 0.681
+      AP@0.5: 0.921
+      AP@0.75: 0.751
+      APL: 0.690
+      APM: 0.545
+    Task: Animal 2D Keypoint
+  Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_ap10k_256x256-9edfafb9_20211029.pth
diff --git a/configs/body_2d_keypoint/simcc/coco/resnet_coco.yml b/configs/body_2d_keypoint/simcc/coco/resnet_coco.yml
index 3ad0caddca..f921ef4ac9 100644
--- a/configs/body_2d_keypoint/simcc/coco/resnet_coco.yml
+++ b/configs/body_2d_keypoint/simcc/coco/resnet_coco.yml
@@ -7,6 +7,7 @@ Collections:
 Models:
 - Config: configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192.py
   In Collection: SimCC
+  Alias: human
   Metadata:
     Architecture: &id001
     - SimCC
diff --git a/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_wflw.yml b/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_wflw.yml
new file mode 100644
index 0000000000..3a0a4a454a
--- /dev/null
+++ b/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_wflw.yml
@@ -0,0 +1,27 @@
+Collections:
+- Name: HRNetv2
+  Paper:
+    Title: Deep High-Resolution Representation Learning for Visual Recognition
+    URL: https://arxiv.org/abs/1908.07919
+  README: https://github.com/open-mmlab/mmpose/blob/1.x/docs/src/papers/backbones/hrnetv2.md
+Models:
+- Config: configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_8xb64-60e_wflw-256x256.py
+  In Collection: HRNetv2
+  Alias: face
+  Metadata:
+    Architecture:
+    - HRNetv2
+    Training Data: WFLW
+  Name: topdown_heatmap_hrnetv2_w18_wflw_256x256
+  Results:
+  - Dataset: WFLW
+    Metrics:
+      NME blur: 4.58
+      NME expression: 4.33
+      NME illumination: 3.99
+      NME makeup: 3.94
+      NME occlusion: 4.83
+      NME pose: 6.97
+      NME test: 4.06
+    Task: Face 2D Keypoint
+  Weights: https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_wflw_256x256-2bf032a6_20210125.pth
diff --git a/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/resnet_onehand10k.yml b/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/resnet_onehand10k.yml
new file mode 100644
index 0000000000..828427899c
--- /dev/null
+++ b/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/resnet_onehand10k.yml
@@ -0,0 +1,24 @@
+Collections:
+- Name: SimpleBaseline2D
+  Paper:
+    Title: Simple baselines for human pose estimation and tracking
+    URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+  README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_res50_8xb32-210e_onehand10k-256x256.py
+  In Collection: SimpleBaseline2D
+  Alias: hand
+  Metadata:
+    Architecture:
+    - SimpleBaseline2D
+    - ResNet
+    Training Data: OneHand10K
+  Name: topdown_heatmap_res50_onehand10k_256x256
+  Results:
+  - Dataset: OneHand10K
+    Metrics:
+      AUC: 0.555
+      EPE: 25.16
+      PCK@0.2: 0.989
+    Task: Hand 2D Keypoint
+  Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_onehand10k_256x256-739c8639_20210330.pth
diff --git a/model-index.yml b/model-index.yml
index 12690e94ef..d961b808aa 100644
--- a/model-index.yml
+++ b/model-index.yml
@@ -6,3 +6,6 @@ Import:
 - configs/body_2d_keypoint/simcc/coco/resnet_coco.yml
 - configs/body_2d_keypoint/simcc/coco/mobilenetv2_coco.yml
 - configs/body_2d_keypoint/simcc/coco/vipnas_coco.yml
+- configs/animal_2d_keypoint/topdown_heatmap/ap10k/resnet_ap10k.yml
+- configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_wflw.yml
+- configs/hand_2d_keypoint/topdown_heatmap/onehand10k/resnet_onehand10k.yml

From 5954434009151e6aeafaca6ba7fd2f72972d3668 Mon Sep 17 00:00:00 2001
From: lupeng <penglu2097@gmail.com>
Date: Mon, 13 Feb 2023 20:21:49 +0800
Subject: [PATCH 02/17] add Pose2DInferencer

---
 .../simcc/coco/resnet_coco.yml                |   1 -
 .../topdown_heatmap/coco/hrnet_coco.yml       |   1 +
 .../yolox-s_8xb8-300e_coco-face.py            | 306 +++++++++++
 mmpose/apis/__init__.py                       |   5 +-
 mmpose/apis/inferencers/__init__.py           |   4 +
 mmpose/apis/inferencers/mmpose_inferencer.py  |   1 +
 mmpose/apis/inferencers/pose2d_inferencer.py  | 497 ++++++++++++++++++
 mmpose/apis/inferencers/utils/__init__.py     |   4 +
 .../inferencers/utils/default_det_models.py   |  21 +
 mmpose/registry.py                            |   7 +
 mmpose/visualization/local_visualizer.py      |   2 +
 .../test_pose2d_inferencer.py                 | 112 ++++
 12 files changed, 959 insertions(+), 2 deletions(-)
 create mode 100644 demo/mmdetection_cfg/yolox-s_8xb8-300e_coco-face.py
 create mode 100644 mmpose/apis/inferencers/__init__.py
 create mode 100644 mmpose/apis/inferencers/mmpose_inferencer.py
 create mode 100644 mmpose/apis/inferencers/pose2d_inferencer.py
 create mode 100644 mmpose/apis/inferencers/utils/__init__.py
 create mode 100644 mmpose/apis/inferencers/utils/default_det_models.py
 create mode 100644 tests/test_apis/test_inferencers/test_pose2d_inferencer.py

diff --git a/configs/body_2d_keypoint/simcc/coco/resnet_coco.yml b/configs/body_2d_keypoint/simcc/coco/resnet_coco.yml
index f921ef4ac9..3ad0caddca 100644
--- a/configs/body_2d_keypoint/simcc/coco/resnet_coco.yml
+++ b/configs/body_2d_keypoint/simcc/coco/resnet_coco.yml
@@ -7,7 +7,6 @@ Collections:
 Models:
 - Config: configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192.py
   In Collection: SimCC
-  Alias: human
   Metadata:
     Architecture: &id001
     - SimCC
diff --git a/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_coco.yml b/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_coco.yml
index 0131493c15..86a305d223 100644
--- a/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_coco.yml
+++ b/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_coco.yml
@@ -7,6 +7,7 @@ Collections:
 Models:
 - Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192.py
   In Collection: HRNet
+  Alias: human
   Metadata:
     Architecture: &id001
     - HRNet
diff --git a/demo/mmdetection_cfg/yolox-s_8xb8-300e_coco-face.py b/demo/mmdetection_cfg/yolox-s_8xb8-300e_coco-face.py
new file mode 100644
index 0000000000..9180b831e6
--- /dev/null
+++ b/demo/mmdetection_cfg/yolox-s_8xb8-300e_coco-face.py
@@ -0,0 +1,306 @@
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=300, val_interval=10)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+param_scheduler = [
+    dict(
+        type='mmdet.QuadraticWarmupLR',
+        by_epoch=True,
+        begin=0,
+        end=5,
+        convert_to_iter_based=True),
+    dict(
+        type='CosineAnnealingLR',
+        eta_min=0.0005,
+        begin=5,
+        T_max=285,
+        end=285,
+        by_epoch=True,
+        convert_to_iter_based=True),
+    dict(type='ConstantLR', by_epoch=True, factor=1, begin=285, end=300)
+]
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(
+        type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005, nesterov=True),
+    paramwise_cfg=dict(norm_decay_mult=0.0, bias_decay_mult=0.0))
+auto_scale_lr = dict(enable=False, base_batch_size=64)
+default_scope = 'mmdet'
+default_hooks = dict(
+    timer=dict(type='IterTimerHook'),
+    logger=dict(type='LoggerHook', interval=50),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=3),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    visualization=dict(type='DetVisualizationHook'))
+env_cfg = dict(
+    cudnn_benchmark=False,
+    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
+    dist_cfg=dict(backend='nccl'))
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='DetLocalVisualizer',
+    vis_backends=[dict(type='LocalVisBackend')],
+    name='visualizer')
+log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
+log_level = 'INFO'
+load_from = 'https://download.openmmlab.com/mmdetection/' \
+            'v2.0/yolox/yolox_s_8x8_300e_coco/' \
+            'yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth'
+resume = False
+img_scale = (640, 640)
+model = dict(
+    type='YOLOX',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        pad_size_divisor=32,
+        batch_augments=[
+            dict(
+                type='BatchSyncRandomResize',
+                random_size_range=(480, 800),
+                size_divisor=32,
+                interval=10)
+        ]),
+    backbone=dict(
+        type='CSPDarknet',
+        deepen_factor=0.33,
+        widen_factor=0.5,
+        out_indices=(2, 3, 4),
+        use_depthwise=False,
+        spp_kernal_sizes=(5, 9, 13),
+        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+        act_cfg=dict(type='Swish')),
+    neck=dict(
+        type='YOLOXPAFPN',
+        in_channels=[128, 256, 512],
+        out_channels=128,
+        num_csp_blocks=1,
+        use_depthwise=False,
+        upsample_cfg=dict(scale_factor=2, mode='nearest'),
+        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+        act_cfg=dict(type='Swish')),
+    bbox_head=dict(
+        type='YOLOXHead',
+        num_classes=1,
+        in_channels=128,
+        feat_channels=128,
+        stacked_convs=2,
+        strides=(8, 16, 32),
+        use_depthwise=False,
+        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+        act_cfg=dict(type='Swish'),
+        loss_cls=dict(
+            type='CrossEntropyLoss',
+            use_sigmoid=True,
+            reduction='sum',
+            loss_weight=1.0),
+        loss_bbox=dict(
+            type='IoULoss',
+            mode='square',
+            eps=1e-16,
+            reduction='sum',
+            loss_weight=5.0),
+        loss_obj=dict(
+            type='CrossEntropyLoss',
+            use_sigmoid=True,
+            reduction='sum',
+            loss_weight=1.0),
+        loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0)),
+    train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)),
+    test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65)))
+data_root = 'data/coco/'
+dataset_type = 'CocoDataset'
+file_client_args = dict(backend='disk')
+train_pipeline = [
+    dict(type='Mosaic', img_scale=(640, 640), pad_val=114.0),
+    dict(
+        type='RandomAffine', scaling_ratio_range=(0.1, 2),
+        border=(-320, -320)),
+    dict(
+        type='MixUp',
+        img_scale=(640, 640),
+        ratio_range=(0.8, 1.6),
+        pad_val=114.0),
+    dict(type='YOLOXHSVRandomAug'),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='Resize', scale=(640, 640), keep_ratio=True),
+    dict(
+        type='Pad',
+        pad_to_square=True,
+        pad_val=dict(img=(114.0, 114.0, 114.0))),
+    dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),
+    dict(type='PackDetInputs')
+]
+train_dataset = dict(
+    type='MultiImageMixDataset',
+    dataset=dict(
+        type='CocoDataset',
+        data_root='data/coco/',
+        ann_file='annotations/instances_train2017.json',
+        data_prefix=dict(img='train2017/'),
+        pipeline=[
+            dict(
+                type='LoadImageFromFile',
+                file_client_args=dict(backend='disk')),
+            dict(type='LoadAnnotations', with_bbox=True)
+        ],
+        filter_cfg=dict(filter_empty_gt=False, min_size=32)),
+    pipeline=[
+        dict(type='Mosaic', img_scale=(640, 640), pad_val=114.0),
+        dict(
+            type='RandomAffine',
+            scaling_ratio_range=(0.1, 2),
+            border=(-320, -320)),
+        dict(
+            type='MixUp',
+            img_scale=(640, 640),
+            ratio_range=(0.8, 1.6),
+            pad_val=114.0),
+        dict(type='YOLOXHSVRandomAug'),
+        dict(type='RandomFlip', prob=0.5),
+        dict(type='Resize', scale=(640, 640), keep_ratio=True),
+        dict(
+            type='Pad',
+            pad_to_square=True,
+            pad_val=dict(img=(114.0, 114.0, 114.0))),
+        dict(
+            type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),
+        dict(type='PackDetInputs')
+    ])
+test_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
+    dict(type='Resize', scale=(640, 640), keep_ratio=True),
+    dict(
+        type='Pad',
+        pad_to_square=True,
+        pad_val=dict(img=(114.0, 114.0, 114.0))),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader = dict(
+    batch_size=8,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type='MultiImageMixDataset',
+        dataset=dict(
+            type='CocoDataset',
+            data_root='data/coco/',
+            ann_file='annotations/coco_face_train.json',
+            data_prefix=dict(img='train2017/'),
+            pipeline=[
+                dict(
+                    type='LoadImageFromFile',
+                    file_client_args=dict(backend='disk')),
+                dict(type='LoadAnnotations', with_bbox=True)
+            ],
+            filter_cfg=dict(filter_empty_gt=False, min_size=32),
+            metainfo=dict(CLASSES=('person', ), PALETTE=(220, 20, 60))),
+        pipeline=[
+            dict(type='Mosaic', img_scale=(640, 640), pad_val=114.0),
+            dict(
+                type='RandomAffine',
+                scaling_ratio_range=(0.1, 2),
+                border=(-320, -320)),
+            dict(
+                type='MixUp',
+                img_scale=(640, 640),
+                ratio_range=(0.8, 1.6),
+                pad_val=114.0),
+            dict(type='YOLOXHSVRandomAug'),
+            dict(type='RandomFlip', prob=0.5),
+            dict(type='Resize', scale=(640, 640), keep_ratio=True),
+            dict(
+                type='Pad',
+                pad_to_square=True,
+                pad_val=dict(img=(114.0, 114.0, 114.0))),
+            dict(
+                type='FilterAnnotations',
+                min_gt_bbox_wh=(1, 1),
+                keep_empty=False),
+            dict(type='PackDetInputs')
+        ]))
+val_dataloader = dict(
+    batch_size=8,
+    num_workers=4,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type='CocoDataset',
+        data_root='data/coco/',
+        ann_file='annotations/coco_face_val.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=[
+            dict(
+                type='LoadImageFromFile',
+                file_client_args=dict(backend='disk')),
+            dict(type='Resize', scale=(640, 640), keep_ratio=True),
+            dict(
+                type='Pad',
+                pad_to_square=True,
+                pad_val=dict(img=(114.0, 114.0, 114.0))),
+            dict(type='LoadAnnotations', with_bbox=True),
+            dict(
+                type='PackDetInputs',
+                meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                           'scale_factor'))
+        ],
+        metainfo=dict(CLASSES=('person', ), PALETTE=(220, 20, 60))))
+test_dataloader = dict(
+    batch_size=8,
+    num_workers=4,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type='CocoDataset',
+        data_root='data/coco/',
+        ann_file='annotations/coco_face_val.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=[
+            dict(
+                type='LoadImageFromFile',
+                file_client_args=dict(backend='disk')),
+            dict(type='Resize', scale=(640, 640), keep_ratio=True),
+            dict(
+                type='Pad',
+                pad_to_square=True,
+                pad_val=dict(img=(114.0, 114.0, 114.0))),
+            dict(type='LoadAnnotations', with_bbox=True),
+            dict(
+                type='PackDetInputs',
+                meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                           'scale_factor'))
+        ],
+        metainfo=dict(CLASSES=('person', ), PALETTE=(220, 20, 60))))
+val_evaluator = dict(
+    type='CocoMetric',
+    ann_file='data/coco/annotations/coco_face_val.json',
+    metric='bbox')
+test_evaluator = dict(
+    type='CocoMetric',
+    ann_file='data/coco/annotations/instances_val2017.json',
+    metric='bbox')
+max_epochs = 300
+num_last_epochs = 15
+interval = 10
+base_lr = 0.01
+custom_hooks = [
+    dict(type='YOLOXModeSwitchHook', num_last_epochs=15, priority=48),
+    dict(type='SyncNormHook', priority=48),
+    dict(
+        type='EMAHook',
+        ema_type='ExpMomentumEMA',
+        momentum=0.0001,
+        strict_load=False,
+        update_buffers=True,
+        priority=49)
+]
+metainfo = dict(CLASSES=('person', ), PALETTE=(220, 20, 60))
+launcher = 'pytorch'
diff --git a/mmpose/apis/__init__.py b/mmpose/apis/__init__.py
index 8534c95e61..8ac662fa17 100644
--- a/mmpose/apis/__init__.py
+++ b/mmpose/apis/__init__.py
@@ -1,4 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .inference import inference_bottomup, inference_topdown, init_model
+from .inferencers import Pose2DInferencer
 
-__all__ = ['init_model', 'inference_topdown', 'inference_bottomup']
+__all__ = [
+    'init_model', 'inference_topdown', 'inference_bottomup', 'Pose2DInferencer'
+]
diff --git a/mmpose/apis/inferencers/__init__.py b/mmpose/apis/inferencers/__init__.py
new file mode 100644
index 0000000000..776c4d4952
--- /dev/null
+++ b/mmpose/apis/inferencers/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .pose2d_inferencer import Pose2DInferencer
+
+__all__ = ['Pose2DInferencer']
diff --git a/mmpose/apis/inferencers/mmpose_inferencer.py b/mmpose/apis/inferencers/mmpose_inferencer.py
new file mode 100644
index 0000000000..ef101fec61
--- /dev/null
+++ b/mmpose/apis/inferencers/mmpose_inferencer.py
@@ -0,0 +1 @@
+# Copyright (c) OpenMMLab. All rights reserved.
diff --git a/mmpose/apis/inferencers/pose2d_inferencer.py b/mmpose/apis/inferencers/pose2d_inferencer.py
new file mode 100644
index 0000000000..1b16fcf391
--- /dev/null
+++ b/mmpose/apis/inferencers/pose2d_inferencer.py
@@ -0,0 +1,497 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import mimetypes
+import os
+import tempfile
+import warnings
+from collections import defaultdict
+from typing import Callable, Dict, List, Optional, Sequence, Union
+
+import mmcv
+import mmengine
+import numpy as np
+import torch.nn as nn
+from mmdet.apis.det_inferencer import DetInferencer
+from mmengine.config import Config, ConfigDict
+from mmengine.dataset import Compose
+from mmengine.fileio import (get_file_backend, isdir, join_path,
+                             list_dir_or_file)
+from mmengine.infer.infer import BaseInferencer, ModelType
+from mmengine.registry import init_default_scope
+from mmengine.runner.checkpoint import _load_checkpoint_to_model
+from mmengine.structures import InstanceData
+from rich.progress import track
+
+from mmpose.apis.inference import dataset_meta_from_config
+from mmpose.evaluation.functional import nms
+from mmpose.registry import DATASETS, INFERENCERS
+from mmpose.structures import (PoseDataSample, merge_data_samples,
+                               split_instances)
+from .utils import default_det_models
+
+InstanceList = List[InstanceData]
+InputType = Union[str, np.ndarray]
+InputsType = Union[InputType, Sequence[InputType]]
+PredType = Union[InstanceData, InstanceList]
+ImgType = Union[np.ndarray, Sequence[np.ndarray]]
+ConfigType = Union[Config, ConfigDict]
+ResType = Union[Dict, List[Dict], InstanceData, List[InstanceData]]
+
+
+@INFERENCERS.register_module(name='pose-estimation')
+@INFERENCERS.register_module()
+class Pose2DInferencer(BaseInferencer):
+    """The inferencer for 2D pose estimation.
+
+    Args:
+        model (str, optional): Path to the config file or the model name
+            defined in metafile. For example, it could be
+            "td-hm_hrnet-w32_8xb64-210e_coco-256x192" or
+            "configs/body_2d_keypoint/topdown_heatmap/coco/" \\
+            "td-hm_hrnet-w32_8xb64-210e_coco-256x192.py"
+        weights (str, optional): Path to the checkpoint. If it is not
+            specified and model is a model name of metafile, the weights
+            will be loaded from metafile. Defaults to None.
+        device (str, optional): Device to run inference. If None, the
+            available device will be automatically used. Defaults to None.
+        scope (str, optional): The scope of the model. Defaults to "mmpose".
+        instance_type (str, optional): The name of the instances, such as
+            "human", "hand", "animal", and etc. This argument works as the
+            alias for the detection models, which will be utilized in
+            topdown methods. Defaults to None.
+        det_model(str, optional): Path to the config of detection model.
+            Defaults to None.
+        det_weights(str, optional): Path to the checkpoints of detection
+            model. Defaults to None.
+        det_cat_ids(int or list[int], optional): Category id for
+            detection model. Defaults to None.
+    """
+
+    preprocess_kwargs: set = {'bbox_thr', 'nms_thr'}
+    forward_kwargs: set = set()
+    visualize_kwargs: set = {
+        'return_vis',
+        'show',
+        'wait_time',
+        'radius',
+        'thickness',
+        'kpt_thr',
+        'vis_out_dir',
+    }
+    postprocess_kwargs: set = {
+        'pred_out_dir',
+        'return_datasample',
+    }
+
+    def __init__(self,
+                 model: Union[ModelType, str],
+                 weights: Optional[str] = None,
+                 device: Optional[str] = None,
+                 scope: Optional[str] = 'mmpose',
+                 instance_type: Optional[str] = None,
+                 det_model: Optional[Union[ModelType, str]] = None,
+                 det_weights: Optional[str] = None,
+                 det_cat_ids: Optional[Union[int, List]] = None) -> None:
+
+        init_default_scope(scope)
+        super().__init__(
+            model=model, weights=weights, device=device, scope=scope)
+
+        # assign dataset metainfo to self.visualizer
+        self.visualizer.set_dataset_meta(self.model.dataset_meta)
+
+        # initialize detector for top-down models
+        if self.cfg.data_mode == 'topdown':
+            if det_model is None:
+                if instance_type is None:
+                    instance_type = DATASETS.get(
+                        self.cfg.dataset_type).__module__.split(
+                            'datasets.')[-1].split('.')[0]
+                if instance_type not in default_det_models:
+                    raise ValueError(
+                        f'detector for {instance_type} has not been '
+                        'provided. You need to specify `det_model`, '
+                        '`det_weights` and `det_cat_ids` manually')
+                det_info = default_det_models[instance_type.lower()]
+                det_model, det_weights, det_cat_ids = det_info[
+                    'model'], det_info['weights'], det_info['cat_ids']
+
+            self.detector = DetInferencer(
+                det_model, det_weights, device=device)
+            self.det_cat_ids = det_cat_ids
+
+        self.video_input = False
+
+    def _load_weights_to_model(self, model: nn.Module,
+                               checkpoint: Optional[dict],
+                               cfg: Optional[ConfigType]) -> None:
+        """Loading model weights and meta information from cfg and checkpoint.
+
+        Subclasses could override this method to load extra meta information
+        from ``checkpoint`` and ``cfg`` to model.
+
+        Args:
+            model (nn.Module): Model to load weights and meta information.
+            checkpoint (dict, optional): The loaded checkpoint.
+            cfg (Config or ConfigDict, optional): The loaded config.
+        """
+        if checkpoint is not None:
+            _load_checkpoint_to_model(model, checkpoint)
+            checkpoint_meta = checkpoint.get('meta', {})
+            # save the dataset_meta in the model for convenience
+            if 'dataset_meta' in checkpoint_meta:
+                # mmpose 1.x
+                model.dataset_meta = checkpoint_meta['dataset_meta']
+            else:
+                warnings.warn(
+                    'dataset_meta are not saved in the checkpoint\'s '
+                    'meta data, load via config.')
+                model.dataset_meta = dataset_meta_from_config(
+                    cfg, dataset_mode='train')
+        else:
+            warnings.warn('Checkpoint is not loaded, and the inference '
+                          'result is calculated by the randomly initialized '
+                          'model!')
+            model.dataset_meta = dataset_meta_from_config(
+                cfg, dataset_mode='train')
+
+    def _inputs_to_list(self, inputs: InputsType) -> list:
+        """Preprocess the inputs to a list.
+
+        Preprocess inputs to a list according to its type:
+
+        - list or tuple: return inputs
+        - str:
+            - Directory path: return all files in the directory
+            - other cases: return a list containing the string. The string
+              could be a path to file, a url or other types of string
+              according to the task.
+
+        Args:
+            inputs (InputsType): Inputs for the inferencer.
+
+        Returns:
+            list: List of input for the :meth:`preprocess`.
+        """
+        if isinstance(inputs, str):
+            backend = get_file_backend(inputs)
+            if hasattr(backend, 'isdir') and isdir(inputs):
+                # Backends like HttpsBackend do not implement `isdir`, so only
+                # those backends that implement `isdir` could accept the
+                # inputs as a directory
+                filepath_list = [
+                    join_path(inputs, fname)
+                    for fname in list_dir_or_file(inputs, list_dir=False)
+                ]
+                inputs = []
+                for filepath in filepath_list:
+                    input_type = mimetypes.guess_type(filepath)[0].split(
+                        '/')[0]
+                    if input_type == 'image':
+                        inputs.append(filepath)
+                inputs = inputs
+            else:
+                # if inputs is a path to a video file, it will be converted
+                # to a list containing separated frame filenames
+                input_type = mimetypes.guess_type(inputs)[0].split('/')[0]
+                if input_type == 'video':
+                    self.video_input = True
+                    # split video frames into a temperory folder
+                    tmp_folder = tempfile.TemporaryDirectory()
+                    video = mmcv.VideoReader(inputs)
+                    self.video_info = dict(
+                        fps=video.fps,
+                        name=os.path.basename(inputs),
+                        tmp_folder=tmp_folder)
+                    video.cvt2frames(tmp_folder.name, show_progress=False)
+                    frames = sorted(os.listdir(tmp_folder.name))
+                    inputs = [os.path.join(tmp_folder.name, f) for f in frames]
+
+        if not isinstance(inputs, (list, tuple)):
+            inputs = [inputs]
+
+        return list(inputs)
+
+    def _init_pipeline(self, cfg: ConfigType) -> Callable:
+        """Initialize the test pipeline.
+
+        Args:
+            cfg (ConfigType): model config path or dict
+
+        Returns:
+            A pipeline to handle various input data, such as ``str``,
+            ``np.ndarray``. The returned pipeline will be used to process
+            a single data.
+        """
+        return Compose(cfg.test_dataloader.dataset.pipeline)
+
+    def preprocess(self,
+                   inputs: InputsType,
+                   batch_size: int = 1,
+                   bbox_thr: float = 0.3,
+                   nms_thr: float = 0.3):
+        """Process the inputs into a model-feedable format.
+
+        1. for topdown models, detection is conducted in this function
+        2. inputs are converted into the format same as the data_info
+           provided by dataset and then fed to pipeline
+        3. batch_size is ineffective here
+        """
+
+        for i, input in enumerate(inputs):
+
+            if isinstance(input, str):
+                data_info = dict(img_path=input)
+            else:
+                data_info = dict(img=input, img_path=f'{i+1}.jpg'.rjust(10))
+            data_info.update(self.model.dataset_meta)
+
+            if self.cfg.data_mode == 'topdown':
+                det_results = self.detector(
+                    input, return_datasample=True)['predictions']
+                pred_instance = det_results[0].pred_instances.cpu().numpy()
+                bboxes = np.concatenate(
+                    (pred_instance.bboxes, pred_instance.scores[:, None]),
+                    axis=1)
+
+                label_mask = np.zeros(len(bboxes), dtype=np.uint8)
+                for cat_id in self.det_cat_ids:
+                    label_mask = np.logical_or(label_mask,
+                                               pred_instance.labels == cat_id)
+
+                bboxes = bboxes[np.logical_and(
+                    label_mask, pred_instance.scores > bbox_thr)]
+                bboxes = bboxes[nms(bboxes, nms_thr)]
+
+                data_infos = []
+                if len(bboxes) > 0:
+                    for bbox in bboxes:
+                        inst = data_info.copy()
+                        inst['bbox'] = bbox[None, :4]
+                        inst['bbox_score'] = bbox[4:5]
+                        data_infos.append(self.pipeline(inst))
+                else:
+                    inst = data_info.copy()
+
+                    # get bbox from the image size
+                    if isinstance(input, str):
+                        input = mmcv.imread(input)
+                    h, w = input.shape[:2]
+
+                    inst['bbox'] = np.array([[0, 0, w, h]], dtype=np.float32)
+                    inst['bbox_score'] = np.ones(1, dtype=np.float32)
+                    data_infos.append(self.pipeline(inst))
+
+            else:  # bottom-up
+                data_infos = [self.pipeline(data_info)]
+
+            yield self.collate_fn(data_infos)
+
+    def forward(self, inputs: Union[dict, tuple]):
+        data_samples = super().forward(inputs)
+        if self.cfg.data_mode == 'topdown':
+            data_samples = [merge_data_samples(data_samples)]
+        return data_samples
+
+    def __call__(
+        self,
+        inputs: InputsType,
+        return_datasamples: bool = False,
+        batch_size: int = 1,
+        out_dir: Optional[str] = None,
+        **kwargs,
+    ) -> dict:
+        """Call the inferencer.
+
+        Args:
+            inputs (InputsType): Inputs for the inferencer.
+            return_datasamples (bool): Whether to return results as
+                :obj:`BaseDataElement`. Defaults to False.
+            batch_size (int): Batch size. Defaults to 1.
+            vis_out_dir (str, optional): directory to save visualization
+                results and predictions. Will be overoden if vis_out_dir or
+                pred_out_dir are given. Defaults to None
+            **kwargs: Key words arguments passed to :meth:`preprocess`,
+                :meth:`forward`, :meth:`visualize` and :meth:`postprocess`.
+                Each key in kwargs should be in the corresponding set of
+                ``preprocess_kwargs``, ``forward_kwargs``,
+                ``visualize_kwargs`` and ``postprocess_kwargs``.
+
+        Returns:
+            dict: Inference and visualization results.
+        """
+        if out_dir is not None:
+            if 'vis_out_dir' not in kwargs:
+                kwargs['vis_out_dir'] = out_dir
+            if 'pred_out_dir' not in kwargs:
+                kwargs['pred_out_dir'] = out_dir
+
+        (
+            preprocess_kwargs,
+            forward_kwargs,
+            visualize_kwargs,
+            postprocess_kwargs,
+        ) = self._dispatch_kwargs(**kwargs)
+
+        ori_inputs = self._inputs_to_list(inputs)
+        inputs = self.preprocess(
+            ori_inputs, batch_size=batch_size, **preprocess_kwargs)
+        preds = []
+        if not hasattr(self, 'detector'):
+            inputs = track(inputs, description='Inference')
+        for data in inputs:
+            preds.extend(self.forward(data, **forward_kwargs))
+        visualization = self.visualize(
+            ori_inputs, preds,
+            **visualize_kwargs)  # type: ignore  # noqa: E501
+        results = self.postprocess(preds, visualization, return_datasamples,
+                                   **postprocess_kwargs)
+        return results
+
+    def visualize(self,
+                  inputs: list,
+                  preds: List[PoseDataSample],
+                  return_vis: bool = False,
+                  show: bool = False,
+                  wait_time: int = 0,
+                  radius: int = 3,
+                  thickness: int = 1,
+                  kpt_thr: float = 0.3,
+                  vis_out_dir: Optional[str] = None) -> List[np.ndarray]:
+        """Visualize predictions.
+
+        Args:
+            inputs (list): Inputs preprocessed by :meth:`_inputs_to_list`.
+            preds (Any): Predictions of the model.
+            return_vis (bool): Whether to return images with predicted results.
+            show (bool): Whether to display the image in a popup window.
+                Defaults to False.
+            show_interval (int): The interval of show (s). Defaults to 0
+            radius (int): Keypoint radius for visualization. Defaults to 3
+            thickness (int): Link thickness for visualization. Defaults to 1
+            kpt_thr (float): The threshold to visualize the keypoints.
+                Defaults to 0.3
+            vis_out_dir (str, optional): directory to save visualization
+                results. Defaults to None.
+
+        Returns:
+            List[np.ndarray]: Visualization results.
+        """
+        if (not return_vis) and (not show) and (vis_out_dir is None):
+            return
+
+        if getattr(self, 'visualizer', None) is None:
+            raise ValueError('Visualization needs the "visualizer" term'
+                             'defined in the config, but got None.')
+
+        self.visualizer.radius = radius
+        self.visualizer.line_width = thickness
+
+        results = []
+
+        for single_input, pred in zip(inputs, preds):
+            if isinstance(single_input, str):
+                img = mmcv.imread(single_input, channel_order='rgb')
+            elif isinstance(single_input, np.ndarray):
+                img = single_input.copy()
+            else:
+                raise ValueError('Unsupported input type: '
+                                 f'{type(single_input)}')
+
+            img_name = os.path.basename(pred.metainfo['img_path'])
+
+            if self.video_input:
+                out_file = os.path.join(self.video_info['tmp_folder'].name,
+                                        img_name)
+            elif vis_out_dir:
+                out_file = os.path.join(vis_out_dir, img_name)
+            else:
+                out_file = None
+
+            visualization = self.visualizer.add_datasample(
+                img_name,
+                img,
+                pred,
+                draw_gt=False,
+                show=show,
+                wait_time=wait_time,
+                out_file=out_file,
+                kpt_score_thr=kpt_thr)
+            results.append(visualization)
+
+        if self.video_input and vis_out_dir:
+            # merge saved frame images into video
+            mmcv.frames2video(
+                self.video_info['tmp_folder'].name,
+                f'{vis_out_dir}/{self.video_info["name"]}',
+                fps=self.video_info['fps'],
+                fourcc='mp4v',
+                show_progress=False)
+
+        return results
+
+    def postprocess(
+        self,
+        preds: List[PoseDataSample],
+        visualization: List[np.ndarray],
+        return_datasample=False,
+        pred_out_dir: str = '',
+    ) -> dict:
+        """Process the predictions and visualization results from ``forward``
+        and ``visualize``.
+
+        This method should be responsible for the following tasks:
+
+        1. Convert datasamples into a json-serializable dict if needed.
+        2. Pack the predictions and visualization results and return them.
+        3. Dump or log the predictions.
+
+        Args:
+            preds (List[Dict]): Predictions of the model.
+            visualization (np.ndarray): Visualized predictions.
+            return_datasample (bool): Whether to return results as
+                datasamples. Defaults to False.
+            pred_out_dir (str): Directory to save the inference results w/o
+                visualization. If left as empty, no file will be saved.
+                Defaults to ''.
+
+        Returns:
+            dict: Inference and visualization results with key ``predictions``
+            and ``visualization``
+
+            - ``visualization (Any)``: Returned by :meth:`visualize`
+            - ``predictions`` (dict or DataSample): Returned by
+              :meth:`forward` and processed in :meth:`postprocess`.
+              If ``return_datasample=False``, it usually should be a
+              json-serializable dict containing only basic data elements such
+              as strings and numbers.
+        """
+
+        result_dict = defaultdict(list)
+
+        result_dict['visualization'] = visualization
+        for pred in preds:
+            if not return_datasample:
+                # convert datasamples to list of instance predictions
+                pred = split_instances(pred.pred_instances)
+            result_dict['predictions'].append(pred)
+
+        if self.video_input:
+            result_dict['predictions'] = [{
+                'frame_id': i,
+                'instances': pred
+            } for i, pred in enumerate(result_dict['predictions'])]
+
+        if pred_out_dir != '':
+            if self.video_input:
+                fname = os.path.splitext(self.video_info['name'])[0] + '.json'
+                mmengine.dump(result_dict['predictions'],
+                              join_path(pred_out_dir, fname))
+            else:
+                for pred, data_sample in zip(result_dict['predictions'],
+                                             preds):
+                    fname = os.path.splitext(
+                        os.path.basename(
+                            data_sample.metainfo['img_path']))[0] + '.json'
+                    mmengine.dump(pred, join_path(pred_out_dir, fname))
+
+        return result_dict
diff --git a/mmpose/apis/inferencers/utils/__init__.py b/mmpose/apis/inferencers/utils/__init__.py
new file mode 100644
index 0000000000..e43e7b6734
--- /dev/null
+++ b/mmpose/apis/inferencers/utils/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .default_det_models import default_det_models
+
+__all__ = ['default_det_models']
diff --git a/mmpose/apis/inferencers/utils/default_det_models.py b/mmpose/apis/inferencers/utils/default_det_models.py
new file mode 100644
index 0000000000..818c4c4260
--- /dev/null
+++ b/mmpose/apis/inferencers/utils/default_det_models.py
@@ -0,0 +1,21 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+default_det_models = dict(
+    human=dict(model='rtmdet-s', weights=None, cat_ids=(0, )),
+    face=dict(
+        model='demo/mmdetection_cfg/yolox-s_8xb8-300e_coco-face.py',
+        weights='https://download.openmmlab.com/mmpose/mmdet_pretrained/'
+        'yolo-x_8xb8-300e_coco-face_13274d7c.pth',
+        cat_ids=(0, )),
+    hand=dict(
+        model='demo/mmdetection_cfg/cascade_rcnn_x101_64x4d_fpn_1class.py',
+        weights='https://download.openmmlab.com/mmpose/mmdet_pretrained/'
+        'cascade_rcnn_x101_64x4d_fpn_20e_onehand10k-dac19597_20201030.pth',
+        cat_ids=(0, )),
+    animal=dict(
+        model='rtmdet-s',
+        weights=None,
+        cat_ids=(15, 16, 17, 18, 19, 20, 21, 22, 23)),
+)
+
+default_det_models['body'] = default_det_models['human']
+default_det_models['wholebody'] = default_det_models['human']
diff --git a/mmpose/registry.py b/mmpose/registry.py
index 78bd75c64b..f0e46a315f 100644
--- a/mmpose/registry.py
+++ b/mmpose/registry.py
@@ -11,6 +11,7 @@
 from mmengine.registry import DATASETS as MMENGINE_DATASETS
 from mmengine.registry import EVALUATOR as MMENGINE_EVALUATOR
 from mmengine.registry import HOOKS as MMENGINE_HOOKS
+from mmengine.registry import INFERENCERS as MMENGINE_INFERENCERS
 from mmengine.registry import LOG_PROCESSORS as MMENGINE_LOG_PROCESSORS
 from mmengine.registry import LOOPS as MMENGINE_LOOPS
 from mmengine.registry import METRICS as MMENGINE_METRICS
@@ -125,3 +126,9 @@
 
 # manager keypoint encoder/decoder
 KEYPOINT_CODECS = Registry('KEYPOINT_CODECS', locations=['mmpose.codecs'])
+
+# manage inferencer
+INFERENCERS = Registry(
+    'inferencer',
+    parent=MMENGINE_INFERENCERS,
+    locations=['mmaction.apis.inferencers'])
diff --git a/mmpose/visualization/local_visualizer.py b/mmpose/visualization/local_visualizer.py
index 2deb20a14e..8801336185 100644
--- a/mmpose/visualization/local_visualizer.py
+++ b/mmpose/visualization/local_visualizer.py
@@ -479,3 +479,5 @@ def add_datasample(self,
         else:
             # save drawn_img to backends
             self.add_image(name, drawn_img, step)
+
+        return self.get_image()
diff --git a/tests/test_apis/test_inferencers/test_pose2d_inferencer.py b/tests/test_apis/test_inferencers/test_pose2d_inferencer.py
new file mode 100644
index 0000000000..77aee38f00
--- /dev/null
+++ b/tests/test_apis/test_inferencers/test_pose2d_inferencer.py
@@ -0,0 +1,112 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import os.path as osp
+from tempfile import TemporaryDirectory
+from unittest import TestCase
+
+import mmcv
+import torch
+from mmengine.infer.infer import BaseInferencer
+
+from mmpose.apis.inferencers import Pose2DInferencer
+from mmpose.structures import PoseDataSample
+
+
+class TestPose2DInferencer(TestCase):
+
+    def _test_init(self):
+
+        # 1. init with config path and checkpoint
+        inferencer = Pose2DInferencer(
+            model='configs/body_2d_keypoint/simcc/coco/'
+            'simcc_res50_8xb64-210e_coco-256x192.py',
+            weights='https://download.openmmlab.com/mmpose/'
+            'v1/body_2d_keypoint/simcc/coco/'
+            'simcc_res50_8xb64-210e_coco-256x192-8e0f5b59_20220919.pth',
+        )
+        self.assertIsInstance(inferencer.model, torch.nn.Module)
+        self.assertIsInstance(inferencer.detector, BaseInferencer)
+        self.assertSequenceEqual(inferencer.det_cat_ids, (0, ))
+
+        # 2. init with config name
+        inferencer = Pose2DInferencer(
+            model='td-hm_res50_8xb32-210e_onehand10k-256x256')
+        self.assertIsInstance(inferencer.model, torch.nn.Module)
+        self.assertIsInstance(inferencer.detector, BaseInferencer)
+        self.assertSequenceEqual(inferencer.det_cat_ids, (0, ))
+
+        # 3. init with alias
+        with self.assertWarnsRegex(
+                Warning, 'dataset_meta are not saved in '
+                'the checkpoint\'s meta data, load via config.'):
+            inferencer = Pose2DInferencer(model='animal')
+        self.assertIsInstance(inferencer.model, torch.nn.Module)
+        self.assertIsInstance(inferencer.detector, BaseInferencer)
+        self.assertSequenceEqual(inferencer.det_cat_ids,
+                                 (15, 16, 17, 18, 19, 20, 21, 22, 23))
+
+        # 4. init with bottom-up model
+        inferencer = Pose2DInferencer(
+            model='configs/body_2d_keypoint/dekr/coco/'
+            'dekr_hrnet-w32_8xb10-140e_coco-512x512.py',
+            weights='https://download.openmmlab.com/mmpose/v1/'
+            'body_2d_keypoint/dekr/coco/'
+            'dekr_hrnet-w32_8xb10-140e_coco-512x512_ac7c17bf-20221228.pth',
+        )
+        self.assertIsInstance(inferencer.model, torch.nn.Module)
+        self.assertFalse(hasattr(inferencer, 'detector'))
+
+        # init with incorrect `instance_type`
+        with self.assertRaises(ValueError):
+            inferencer = Pose2DInferencer(
+                model='animal', instance_type='aminal')
+
+    def test_call(self):
+
+        # top-down model
+        inferencer = Pose2DInferencer('human')
+
+        img_path = 'tests/data/coco/000000197388.jpg'
+        img = mmcv.imread(img_path)
+
+        # `inputs` is path to an image
+        inputs = img_path
+        results1 = inferencer(inputs, return_vis=True)
+        self.assertIn('visualization', results1)
+        self.assertSequenceEqual(results1['visualization'][0].shape, img.shape)
+        self.assertIn('predictions', results1)
+        self.assertIn('keypoints', results1['predictions'][0][0])
+        self.assertEqual(len(results1['predictions'][0][0]['keypoints']), 17)
+
+        # `inputs` is an image array
+        inputs = img
+        results2 = inferencer(inputs)
+        self.assertEqual(
+            len(results1['predictions'][0]), len(results2['predictions'][0]))
+        self.assertSequenceEqual(results1['predictions'][0][0]['keypoints'],
+                                 results2['predictions'][0][0]['keypoints'])
+        results2 = inferencer(inputs, return_datasamples=True)
+        self.assertIsInstance(results2['predictions'][0], PoseDataSample)
+
+        # `inputs` is path to a directory
+        inputs = osp.dirname(img_path)
+        with TemporaryDirectory() as tmp_dir:
+            # only save visualizations
+            results3 = inferencer(inputs, vis_out_dir=tmp_dir)
+            self.assertEqual(len(os.listdir(tmp_dir)), 4)
+            # save both visualizations and predictions
+            results3 = inferencer(inputs, out_dir=tmp_dir)
+            self.assertEqual(len(os.listdir(tmp_dir)), 8)
+        self.assertEqual(len(results3['predictions']), 4)
+        self.assertSequenceEqual(results1['predictions'][0][0]['keypoints'],
+                                 results3['predictions'][3][0]['keypoints'])
+
+        # `inputs` is path to a video
+        inputs = 'tests/data/posetrack18/videos/000001_mpiinew_test/' \
+                 '000001_mpiinew_test.mp4'
+        with TemporaryDirectory() as tmp_dir:
+            results = inferencer(inputs, out_dir=tmp_dir)
+            self.assertIn('000001_mpiinew_test.mp4', os.listdir(tmp_dir))
+            self.assertIn('000001_mpiinew_test.json', os.listdir(tmp_dir))
+        self.assertTrue(inferencer.video_input)
+        self.assertIn(len(results['predictions']), (4, 5))

From 7fc4eae25927f9fd232a7443647e9272e0d9cd02 Mon Sep 17 00:00:00 2001
From: lupeng <penglu2097@gmail.com>
Date: Mon, 13 Feb 2023 21:31:43 +0800
Subject: [PATCH 03/17] add MMPoseInferencer

---
 demo/inferencer_demo.py                       | 109 +++++++
 mmpose/apis/__init__.py                       |   5 +-
 mmpose/apis/inferencers/__init__.py           |   3 +-
 mmpose/apis/inferencers/mmpose_inferencer.py  | 281 ++++++++++++++++++
 mmpose/apis/inferencers/pose2d_inferencer.py  |  37 +--
 .../test_mmpose_inferencer.py                 |  65 ++++
 .../test_pose2d_inferencer.py                 |  11 +-
 7 files changed, 483 insertions(+), 28 deletions(-)
 create mode 100644 demo/inferencer_demo.py
 create mode 100644 tests/test_apis/test_inferencers/test_mmpose_inferencer.py

diff --git a/demo/inferencer_demo.py b/demo/inferencer_demo.py
new file mode 100644
index 0000000000..6cded95dc9
--- /dev/null
+++ b/demo/inferencer_demo.py
@@ -0,0 +1,109 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from argparse import ArgumentParser
+
+from mmpose.apis.inferencers import MMPoseInferencer
+
+
+def parse_args():
+    parser = ArgumentParser()
+    parser.add_argument(
+        'inputs', type=str, help='Input image/video path or folder path.')
+    parser.add_argument(
+        '--pose2d',
+        type=str,
+        default=None,
+        help='Pretrained 2D pose estimation algorithm. It\'s the path to the '
+        'config file or the model name defined in metafile.')
+    parser.add_argument(
+        '--pose2d-weights',
+        type=str,
+        default=None,
+        help='Path to the custom checkpoint file of the selected pose model. '
+        'If it is not specified and "rec" is a model name of metafile, the '
+        'weights will be loaded from metafile.')
+    parser.add_argument(
+        '--instance-type',
+        type=str,
+        default=None,
+        help='The name of the target instances, such as'
+        '"human", "hand", "animal", and etc.')
+    parser.add_argument(
+        '--det-model',
+        type=str,
+        default=None,
+        help='Path to the config of detection model.')
+    parser.add_argument(
+        '--det-weights',
+        type=str,
+        default=None,
+        help='Path to the checkpoints of detection model.')
+    parser.add_argument(
+        '--det-cat-ids',
+        type=int,
+        nargs='+',
+        default=None,
+        help='Category id for detection model.')
+    parser.add_argument(
+        '--device',
+        type=str,
+        default=None,
+        help='Device used for inference. '
+        'If not specified, the available device will be automatically used.')
+    parser.add_argument(
+        '--show',
+        action='store_true',
+        help='Display the video in a popup window.')
+    parser.add_argument(
+        '--bbox-thr',
+        type=float,
+        default=0.3,
+        help='Bounding box score threshold')
+    parser.add_argument(
+        '--nms-thr',
+        type=float,
+        default=0.3,
+        help='IoU threshold for bounding box NMS')
+    parser.add_argument(
+        '--kpt-thr', type=float, default=0.3, help='Keypoint score threshold')
+    parser.add_argument(
+        '--radius',
+        type=int,
+        default=3,
+        help='Keypoint radius for visualization')
+    parser.add_argument(
+        '--thickness',
+        type=int,
+        default=1,
+        help='Link thickness for visualization')
+    parser.add_argument(
+        '--vis-out-dir',
+        type=str,
+        default='',
+        help='Output directory of visualization results.')
+    parser.add_argument(
+        '--pred-out-dir',
+        type=str,
+        default='',
+        help='Output directory to save the inference results.')
+
+    call_args = vars(parser.parse_args())
+
+    init_kws = [
+        'pose2d', 'pose2d_weights', 'device', 'instance_type', 'det_model',
+        'det_weights', 'det_cat_ids'
+    ]
+    init_args = {}
+    for init_kw in init_kws:
+        init_args[init_kw] = call_args.pop(init_kw)
+
+    return init_args, call_args
+
+
+def main():
+    init_args, call_args = parse_args()
+    mmaction2 = MMPoseInferencer(**init_args)
+    mmaction2(**call_args)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/mmpose/apis/__init__.py b/mmpose/apis/__init__.py
index 8ac662fa17..ff7149e453 100644
--- a/mmpose/apis/__init__.py
+++ b/mmpose/apis/__init__.py
@@ -1,7 +1,8 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .inference import inference_bottomup, inference_topdown, init_model
-from .inferencers import Pose2DInferencer
+from .inferencers import MMPoseInferencer, Pose2DInferencer
 
 __all__ = [
-    'init_model', 'inference_topdown', 'inference_bottomup', 'Pose2DInferencer'
+    'init_model', 'inference_topdown', 'inference_bottomup',
+    'Pose2DInferencer', 'MMPoseInferencer'
 ]
diff --git a/mmpose/apis/inferencers/__init__.py b/mmpose/apis/inferencers/__init__.py
index 776c4d4952..3c21a02e08 100644
--- a/mmpose/apis/inferencers/__init__.py
+++ b/mmpose/apis/inferencers/__init__.py
@@ -1,4 +1,5 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+from .mmpose_inferencer import MMPoseInferencer
 from .pose2d_inferencer import Pose2DInferencer
 
-__all__ = ['Pose2DInferencer']
+__all__ = ['Pose2DInferencer', 'MMPoseInferencer']
diff --git a/mmpose/apis/inferencers/mmpose_inferencer.py b/mmpose/apis/inferencers/mmpose_inferencer.py
index ef101fec61..541a2e8f19 100644
--- a/mmpose/apis/inferencers/mmpose_inferencer.py
+++ b/mmpose/apis/inferencers/mmpose_inferencer.py
@@ -1 +1,282 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+import mimetypes
+import os
+import tempfile
+from collections import defaultdict
+from typing import Dict, List, Optional, Sequence, Union
+
+import mmcv
+import numpy as np
+from mmengine.config import Config, ConfigDict
+from mmengine.fileio import (get_file_backend, isdir, join_path,
+                             list_dir_or_file)
+from mmengine.infer.infer import BaseInferencer, ModelType
+from mmengine.structures import InstanceData
+
+from mmpose.structures import PoseDataSample
+from .pose2d_inferencer import Pose2DInferencer
+
+InstanceList = List[InstanceData]
+InputType = Union[str, np.ndarray]
+InputsType = Union[InputType, Sequence[InputType]]
+PredType = Union[InstanceData, InstanceList]
+ImgType = Union[np.ndarray, Sequence[np.ndarray]]
+ConfigType = Union[Config, ConfigDict]
+ResType = Union[Dict, List[Dict], InstanceData, List[InstanceData]]
+
+
+class MMPoseInferencer(BaseInferencer):
+    """MMPose Inferencer. It's a unified inferencer interface for pose
+    estimation task, currently including: Pose2D. and it can be used to perform
+    2D keypoint detection.
+
+    Args:
+        pose2d (str, optional): Pretrained 2D pose estimation algorithm.
+            It's the path to the config file or the model name defined in
+            metafile. For example, it could be:
+
+            - model alias, e.g. ``'body'``,
+            - config name, e.g. ``'simcc_res50_8xb64-210e_coco-256x192'``,
+            - config path
+
+            Defaults to ``None``.
+        pose2d_weights (str, optional): Path to the custom checkpoint file of
+            the selected rec model. If it is not specified and "pose2d" is a
+            model name of metafile, the weights will be loaded from metafile.
+            Defaults to None.
+        device (str, optional): Device to run inference. If None, the
+            available device will be automatically used. Defaults to None.
+        scope (str, optional): The scope of the model. Defaults to "mmpose".
+        instance_type (str, optional): The name of the instances, such as
+            "human", "hand", "animal", and etc. This argument works as the
+            alias for the detection models, which will be utilized in
+            topdown methods. Defaults to None.
+        det_model(str, optional): Path to the config of detection model.
+            Defaults to None.
+        det_weights(str, optional): Path to the checkpoints of detection
+            model. Defaults to None.
+        det_cat_ids(int or list[int], optional): Category id for
+            detection model. Defaults to None.
+    """
+
+    preprocess_kwargs: set = {'bbox_thr', 'nms_thr'}
+    forward_kwargs: set = set()
+    visualize_kwargs: set = {
+        'return_vis',
+        'show',
+        'wait_time',
+        'radius',
+        'thickness',
+        'kpt_thr',
+        'vis_out_dir',
+    }
+    postprocess_kwargs: set = {'pred_out_dir'}
+
+    def __init__(self,
+                 pose2d: Optional[str] = None,
+                 pose2d_weights: Optional[str] = None,
+                 device: Optional[str] = None,
+                 scope: str = 'mmpose',
+                 instance_type: Optional[str] = None,
+                 det_model: Optional[Union[ModelType, str]] = None,
+                 det_weights: Optional[str] = None,
+                 det_cat_ids: Optional[Union[int, List]] = None) -> None:
+
+        if pose2d is None:
+            raise ValueError('2d pose estimation algorithm should provided.')
+
+        self.visualizer = None
+        if pose2d is not None:
+            self.pose2d_inferencer = Pose2DInferencer(pose2d, pose2d_weights,
+                                                      device, scope,
+                                                      instance_type, det_model,
+                                                      det_weights, det_cat_ids)
+            self.mode = 'pose2d'
+
+    def _init_pipeline(self, cfg: ConfigType) -> None:
+        pass
+
+    def forward(self, inputs: InputType, batch_size: int,
+                **forward_kwargs) -> PredType:
+        """Forward the inputs to the model.
+
+        Args:
+            inputs (InputsType): The inputs to be forwarded.
+            batch_size (int): Batch size. Defaults to 1.
+
+        Returns:
+            Dict: The prediction results. Possibly with keys "pose2d".
+        """
+        result = {}
+        if self.mode == 'pose2d':
+            predictions = self.pose2d_inferencer(
+                inputs,
+                return_datasample=True,
+                return_vis=False,
+                batch_size=batch_size,
+                **forward_kwargs)['predictions']
+            result['pose2d'] = predictions
+
+        return result
+
+    def __call__(
+        self,
+        inputs: InputsType,
+        return_datasample: bool = False,
+        batch_size: int = 1,
+        out_dir: Optional[str] = None,
+        **kwargs,
+    ) -> dict:
+        """Call the inferencer.
+
+        Args:
+            inputs (InputsType): Inputs for the inferencer.
+            return_datasample (bool): Whether to return results as
+                :obj:`BaseDataElement`. Defaults to False.
+            batch_size (int): Batch size. Defaults to 1.
+            out_dir (str, optional): directory to save visualization
+                results and predictions. Will be overoden if vis_out_dir or
+                pred_out_dir are given. Defaults to None
+            **kwargs: Key words arguments passed to :meth:`preprocess`,
+                :meth:`forward`, :meth:`visualize` and :meth:`postprocess`.
+                Each key in kwargs should be in the corresponding set of
+                ``preprocess_kwargs``, ``forward_kwargs``,
+                ``visualize_kwargs`` and ``postprocess_kwargs``.
+
+        Returns:
+            dict: Inference and visualization results.
+        """
+        if out_dir is not None:
+            if 'vis_out_dir' not in kwargs:
+                kwargs['vis_out_dir'] = f'{out_dir}/visualizations'
+            if 'pred_out_dir' not in kwargs:
+                kwargs['pred_out_dir'] = f'{out_dir}/predictions'
+        (
+            preprocess_kwargs,
+            forward_kwargs,
+            visualize_kwargs,
+            postprocess_kwargs,
+        ) = self._dispatch_kwargs(**kwargs)
+
+        ori_inputs = self._inputs_to_list(inputs)
+
+        preds = self.forward(ori_inputs, batch_size, **preprocess_kwargs,
+                             **forward_kwargs)
+
+        visualization = self.visualize(ori_inputs, preds, **visualize_kwargs)
+        results = self.postprocess(preds, visualization, return_datasample,
+                                   **postprocess_kwargs)
+        return results
+
+    def _inputs_to_list(self, inputs: InputsType) -> list:
+        """Preprocess the inputs to a list.
+
+        Preprocess inputs to a list according to its type:
+
+        - list or tuple: return inputs
+        - str:
+            - Directory path: return all files in the directory
+            - other cases: return a list containing the string. The string
+              could be a path to file, a url or other types of string
+              according to the task.
+
+        Args:
+            inputs (InputsType): Inputs for the inferencer.
+
+        Returns:
+            list: List of input for the :meth:`preprocess`.
+        """
+        if isinstance(inputs, str):
+            backend = get_file_backend(inputs)
+            if hasattr(backend, 'isdir') and isdir(inputs):
+                # Backends like HttpsBackend do not implement `isdir`, so only
+                # those backends that implement `isdir` could accept the
+                # inputs as a directory
+                filepath_list = [
+                    join_path(inputs, fname)
+                    for fname in list_dir_or_file(inputs, list_dir=False)
+                ]
+                inputs = []
+                for filepath in filepath_list:
+                    input_type = mimetypes.guess_type(filepath)[0].split(
+                        '/')[0]
+                    if input_type == 'image':
+                        inputs.append(filepath)
+                inputs = inputs
+            else:
+                # if inputs is a path to a video file, it will be converted
+                # to a list containing separated frame filenames
+                input_type = mimetypes.guess_type(inputs)[0].split('/')[0]
+                if input_type == 'video':
+                    if hasattr(self, 'pose2d_inferencer'):
+                        self.pose2d_inferencer.video_input = True
+                    # split video frames into a temperory folder
+                    tmp_folder = tempfile.TemporaryDirectory()
+                    video = mmcv.VideoReader(inputs)
+                    self.pose2d_inferencer.video_info = dict(
+                        fps=video.fps,
+                        name=os.path.basename(inputs),
+                        tmp_folder=tmp_folder)
+                    video.cvt2frames(tmp_folder.name, show_progress=False)
+                    frames = sorted(os.listdir(tmp_folder.name))
+                    inputs = [os.path.join(tmp_folder.name, f) for f in frames]
+
+        if not isinstance(inputs, (list, tuple)):
+            inputs = [inputs]
+
+        return list(inputs)
+
+    def visualize(self, inputs: InputsType, preds: PredType,
+                  **kwargs) -> List[np.ndarray]:
+        """Visualize predictions.
+
+        Args:
+            inputs (list): Inputs preprocessed by :meth:`_inputs_to_list`.
+            preds (Any): Predictions of the model.
+            return_vis (bool): Whether to return images with predicted results.
+            show (bool): Whether to display the image in a popup window.
+                Defaults to False.
+            show_interval (int): The interval of show (s). Defaults to 0
+            radius (int): Keypoint radius for visualization. Defaults to 3
+            thickness (int): Link thickness for visualization. Defaults to 1
+            kpt_thr (float): The threshold to visualize the keypoints.
+                Defaults to 0.3
+            vis_out_dir (str, optional): directory to save visualization
+                results w/o predictions. If left as empty, no file will
+                be saved. Defaults to ''.
+
+        Returns:
+            List[np.ndarray]: Visualization results.
+        """
+
+        if 'pose2d' in self.mode:
+            return self.pose2d_inferencer.visualize(inputs, preds['pose2d'],
+                                                    **kwargs)
+
+    def postprocess(self,
+                    preds: List[PoseDataSample],
+                    visualization: List[np.ndarray],
+                    return_datasample=False,
+                    pred_out_dir: str = '',
+                    **kwargs) -> dict:
+        """Simply apply postprocess of pose2d_inferencer.
+
+        Args:
+            preds (List[Dict]): Predictions of the model.
+            visualization (np.ndarray): Visualized predictions.
+            return_datasample (bool): Whether to return results as datasamples.
+                Defaults to False.
+            pred_out_dir (str): Directory to save the inference results w/o
+                visualization. If left as empty, no file will be saved.
+                Defaults to ''.
+        """
+        result_dict = defaultdict(list)
+
+        result_dict['visualization'] = visualization
+
+        if 'pose2d' in self.mode:
+            result_dict['predictions'] = self.pose2d_inferencer.postprocess(
+                preds['pose2d'], visualization, return_datasample,
+                pred_out_dir, **kwargs)['predictions']
+
+        return result_dict
diff --git a/mmpose/apis/inferencers/pose2d_inferencer.py b/mmpose/apis/inferencers/pose2d_inferencer.py
index 1b16fcf391..34f6f46602 100644
--- a/mmpose/apis/inferencers/pose2d_inferencer.py
+++ b/mmpose/apis/inferencers/pose2d_inferencer.py
@@ -77,10 +77,7 @@ class Pose2DInferencer(BaseInferencer):
         'kpt_thr',
         'vis_out_dir',
     }
-    postprocess_kwargs: set = {
-        'pred_out_dir',
-        'return_datasample',
-    }
+    postprocess_kwargs: set = {'pred_out_dir'}
 
     def __init__(self,
                  model: Union[ModelType, str],
@@ -295,7 +292,7 @@ def forward(self, inputs: Union[dict, tuple]):
     def __call__(
         self,
         inputs: InputsType,
-        return_datasamples: bool = False,
+        return_datasample: bool = False,
         batch_size: int = 1,
         out_dir: Optional[str] = None,
         **kwargs,
@@ -304,10 +301,10 @@ def __call__(
 
         Args:
             inputs (InputsType): Inputs for the inferencer.
-            return_datasamples (bool): Whether to return results as
+            return_datasample (bool): Whether to return results as
                 :obj:`BaseDataElement`. Defaults to False.
             batch_size (int): Batch size. Defaults to 1.
-            vis_out_dir (str, optional): directory to save visualization
+            out_dir (str, optional): directory to save visualization
                 results and predictions. Will be overoden if vis_out_dir or
                 pred_out_dir are given. Defaults to None
             **kwargs: Key words arguments passed to :meth:`preprocess`,
@@ -321,9 +318,9 @@ def __call__(
         """
         if out_dir is not None:
             if 'vis_out_dir' not in kwargs:
-                kwargs['vis_out_dir'] = out_dir
+                kwargs['vis_out_dir'] = f'{out_dir}/visualizations'
             if 'pred_out_dir' not in kwargs:
-                kwargs['pred_out_dir'] = out_dir
+                kwargs['pred_out_dir'] = f'{out_dir}/predictions'
 
         (
             preprocess_kwargs,
@@ -340,10 +337,8 @@ def __call__(
             inputs = track(inputs, description='Inference')
         for data in inputs:
             preds.extend(self.forward(data, **forward_kwargs))
-        visualization = self.visualize(
-            ori_inputs, preds,
-            **visualize_kwargs)  # type: ignore  # noqa: E501
-        results = self.postprocess(preds, visualization, return_datasamples,
+        visualization = self.visualize(ori_inputs, preds, **visualize_kwargs)
+        results = self.postprocess(preds, visualization, return_datasample,
                                    **postprocess_kwargs)
         return results
 
@@ -356,7 +351,7 @@ def visualize(self,
                   radius: int = 3,
                   thickness: int = 1,
                   kpt_thr: float = 0.3,
-                  vis_out_dir: Optional[str] = None) -> List[np.ndarray]:
+                  vis_out_dir: str = '') -> List[np.ndarray]:
         """Visualize predictions.
 
         Args:
@@ -371,7 +366,8 @@ def visualize(self,
             kpt_thr (float): The threshold to visualize the keypoints.
                 Defaults to 0.3
             vis_out_dir (str, optional): directory to save visualization
-                results. Defaults to None.
+                results w/o predictions. If left as empty, no file will
+                be saved. Defaults to ''.
 
         Returns:
             List[np.ndarray]: Visualization results.
@@ -420,6 +416,7 @@ def visualize(self,
 
         if self.video_input and vis_out_dir:
             # merge saved frame images into video
+            os.makedirs(vis_out_dir, exist_ok=True)
             mmcv.frames2video(
                 self.video_info['tmp_folder'].name,
                 f'{vis_out_dir}/{self.video_info["name"]}',
@@ -475,14 +472,12 @@ def postprocess(
                 pred = split_instances(pred.pred_instances)
             result_dict['predictions'].append(pred)
 
-        if self.video_input:
-            result_dict['predictions'] = [{
-                'frame_id': i,
-                'instances': pred
-            } for i, pred in enumerate(result_dict['predictions'])]
-
         if pred_out_dir != '':
             if self.video_input:
+                result_dict['predictions'] = [{
+                    'frame_id': i,
+                    'instances': pred
+                } for i, pred in enumerate(result_dict['predictions'])]
                 fname = os.path.splitext(self.video_info['name'])[0] + '.json'
                 mmengine.dump(result_dict['predictions'],
                               join_path(pred_out_dir, fname))
diff --git a/tests/test_apis/test_inferencers/test_mmpose_inferencer.py b/tests/test_apis/test_inferencers/test_mmpose_inferencer.py
new file mode 100644
index 0000000000..03c649982c
--- /dev/null
+++ b/tests/test_apis/test_inferencers/test_mmpose_inferencer.py
@@ -0,0 +1,65 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import os.path as osp
+from tempfile import TemporaryDirectory
+from unittest import TestCase
+
+import mmcv
+
+from mmpose.apis.inferencers import MMPoseInferencer
+from mmpose.structures import PoseDataSample
+
+
+class TestMMPoseInferencer(TestCase):
+
+    def test_call(self):
+
+        # top-down model
+        inferencer = MMPoseInferencer('human')
+
+        img_path = 'tests/data/coco/000000197388.jpg'
+        img = mmcv.imread(img_path)
+
+        # `inputs` is path to an image
+        inputs = img_path
+        results1 = inferencer(inputs, return_vis=True)
+        self.assertIn('visualization', results1)
+        self.assertSequenceEqual(results1['visualization'][0].shape, img.shape)
+        self.assertIn('predictions', results1)
+        self.assertIn('keypoints', results1['predictions'][0][0])
+        self.assertEqual(len(results1['predictions'][0][0]['keypoints']), 17)
+
+        # `inputs` is an image array
+        inputs = img
+        results2 = inferencer(inputs)
+        self.assertEqual(
+            len(results1['predictions'][0]), len(results2['predictions'][0]))
+        self.assertSequenceEqual(results1['predictions'][0][0]['keypoints'],
+                                 results2['predictions'][0][0]['keypoints'])
+        results2 = inferencer(inputs, return_datasample=True)
+        self.assertIsInstance(results2['predictions'][0], PoseDataSample)
+
+        # `inputs` is path to a directory
+        inputs = osp.dirname(img_path)
+        with TemporaryDirectory() as tmp_dir:
+            # only save visualizations
+            results3 = inferencer(inputs, vis_out_dir=tmp_dir)
+            self.assertEqual(len(os.listdir(tmp_dir)), 4)
+            # save both visualizations and predictions
+            results3 = inferencer(inputs, out_dir=tmp_dir)
+            self.assertEqual(len(os.listdir(f'{tmp_dir}/visualizations')), 4)
+            self.assertEqual(len(os.listdir(f'{tmp_dir}/predictions')), 4)
+        self.assertEqual(len(results3['predictions']), 4)
+        self.assertSequenceEqual(results1['predictions'][0][0]['keypoints'],
+                                 results3['predictions'][3][0]['keypoints'])
+
+        # `inputs` is path to a video
+        inputs = 'tests/data/posetrack18/videos/000001_mpiinew_test/' \
+                 '000001_mpiinew_test.mp4'
+        with TemporaryDirectory() as tmp_dir:
+            results = inferencer(inputs, out_dir=tmp_dir)
+            self.assertIn('000001_mpiinew_test.mp4',
+                          os.listdir(f'{tmp_dir}/visualizations'))
+            self.assertIn('000001_mpiinew_test.json',
+                          os.listdir(f'{tmp_dir}/predictions'))
+        self.assertIn(len(results['predictions']), (4, 5))
diff --git a/tests/test_apis/test_inferencers/test_pose2d_inferencer.py b/tests/test_apis/test_inferencers/test_pose2d_inferencer.py
index 77aee38f00..b72a870a8b 100644
--- a/tests/test_apis/test_inferencers/test_pose2d_inferencer.py
+++ b/tests/test_apis/test_inferencers/test_pose2d_inferencer.py
@@ -85,7 +85,7 @@ def test_call(self):
             len(results1['predictions'][0]), len(results2['predictions'][0]))
         self.assertSequenceEqual(results1['predictions'][0][0]['keypoints'],
                                  results2['predictions'][0][0]['keypoints'])
-        results2 = inferencer(inputs, return_datasamples=True)
+        results2 = inferencer(inputs, return_datasample=True)
         self.assertIsInstance(results2['predictions'][0], PoseDataSample)
 
         # `inputs` is path to a directory
@@ -96,7 +96,8 @@ def test_call(self):
             self.assertEqual(len(os.listdir(tmp_dir)), 4)
             # save both visualizations and predictions
             results3 = inferencer(inputs, out_dir=tmp_dir)
-            self.assertEqual(len(os.listdir(tmp_dir)), 8)
+            self.assertEqual(len(os.listdir(f'{tmp_dir}/visualizations')), 4)
+            self.assertEqual(len(os.listdir(f'{tmp_dir}/predictions')), 4)
         self.assertEqual(len(results3['predictions']), 4)
         self.assertSequenceEqual(results1['predictions'][0][0]['keypoints'],
                                  results3['predictions'][3][0]['keypoints'])
@@ -106,7 +107,9 @@ def test_call(self):
                  '000001_mpiinew_test.mp4'
         with TemporaryDirectory() as tmp_dir:
             results = inferencer(inputs, out_dir=tmp_dir)
-            self.assertIn('000001_mpiinew_test.mp4', os.listdir(tmp_dir))
-            self.assertIn('000001_mpiinew_test.json', os.listdir(tmp_dir))
+            self.assertIn('000001_mpiinew_test.mp4',
+                          os.listdir(f'{tmp_dir}/visualizations'))
+            self.assertIn('000001_mpiinew_test.json',
+                          os.listdir(f'{tmp_dir}/predictions'))
         self.assertTrue(inferencer.video_input)
         self.assertIn(len(results['predictions']), (4, 5))

From 0c67427b5115c4fda8adcee90e719b55a6e77c1f Mon Sep 17 00:00:00 2001
From: lupeng <penglu2097@gmail.com>
Date: Tue, 14 Feb 2023 21:02:43 +0800
Subject: [PATCH 04/17] modify default detectors

---
 .../inferencers/utils/default_det_models.py   | 29 +++++++++++++++----
 .../test_hooks/test_visualization_hook.py     |  2 +-
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/mmpose/apis/inferencers/utils/default_det_models.py b/mmpose/apis/inferencers/utils/default_det_models.py
index 818c4c4260..96fda2cf14 100644
--- a/mmpose/apis/inferencers/utils/default_det_models.py
+++ b/mmpose/apis/inferencers/utils/default_det_models.py
@@ -1,19 +1,38 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+
+from mmengine.config.utils import MODULE2PACKAGE
+from mmengine.utils import get_installed_path
+
+mmpose_path = get_installed_path(MODULE2PACKAGE['mmpose'])
+
 default_det_models = dict(
-    human=dict(model='rtmdet-s', weights=None, cat_ids=(0, )),
+    human=dict(
+        model=osp.join(mmpose_path, '.mim',
+                       'demo/mmdetection_cfg/faster_rcnn_r50_fpn_coco.py'),
+        weights='https://download.openmmlab.com/mmdetection/v2.0/'
+        'faster_rcnn/faster_rcnn_r50_fpn_1x_coco/'
+        'faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth',
+        cat_ids=(0, )),
     face=dict(
-        model='demo/mmdetection_cfg/yolox-s_8xb8-300e_coco-face.py',
+        model=osp.join(mmpose_path, '.mim',
+                       'demo/mmdetection_cfg/yolox-s_8xb8-300e_coco-face.py'),
         weights='https://download.openmmlab.com/mmpose/mmdet_pretrained/'
         'yolo-x_8xb8-300e_coco-face_13274d7c.pth',
         cat_ids=(0, )),
     hand=dict(
-        model='demo/mmdetection_cfg/cascade_rcnn_x101_64x4d_fpn_1class.py',
+        model=osp.join(
+            mmpose_path, '.mim',
+            'demo/mmdetection_cfg/cascade_rcnn_x101_64x4d_fpn_1class.py'),
         weights='https://download.openmmlab.com/mmpose/mmdet_pretrained/'
         'cascade_rcnn_x101_64x4d_fpn_20e_onehand10k-dac19597_20201030.pth',
         cat_ids=(0, )),
     animal=dict(
-        model='rtmdet-s',
-        weights=None,
+        model=osp.join(mmpose_path, '.mim',
+                       'demo/mmdetection_cfg/faster_rcnn_r50_fpn_coco.py'),
+        weights='https://download.openmmlab.com/mmdetection/v2.0/'
+        'faster_rcnn/faster_rcnn_r50_fpn_1x_coco/'
+        'faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth',
         cat_ids=(15, 16, 17, 18, 19, 20, 21, 22, 23)),
 )
 
diff --git a/tests/test_engine/test_hooks/test_visualization_hook.py b/tests/test_engine/test_hooks/test_visualization_hook.py
index 8eab9670de..3e4a202198 100644
--- a/tests/test_engine/test_hooks/test_visualization_hook.py
+++ b/tests/test_engine/test_hooks/test_visualization_hook.py
@@ -27,7 +27,7 @@ def _rand_poses(num_boxes, h, w):
 class TestVisualizationHook(TestCase):
 
     def setUp(self) -> None:
-        PoseLocalVisualizer.get_instance('visualizer')
+        PoseLocalVisualizer.get_instance('test_visualization_hook')
 
         data_sample = PoseDataSample()
         data_sample.set_metainfo({

From bcddb35513750b94e758cb533e1674a04bb54fc8 Mon Sep 17 00:00:00 2001
From: lupeng <penglu2097@gmail.com>
Date: Fri, 17 Feb 2023 20:53:39 +0800
Subject: [PATCH 05/17] support webcam input

---
 demo/inferencer_demo.py                       |  19 +-
 .../inferencers/base_mmpose_inferencer.py     | 444 ++++++++++++++++++
 mmpose/apis/inferencers/mmpose_inferencer.py  | 225 +++++----
 mmpose/apis/inferencers/pose2d_inferencer.py  | 435 ++++-------------
 .../test_mmpose_inferencer.py                 |  23 +-
 .../test_pose2d_inferencer.py                 |  25 +-
 6 files changed, 699 insertions(+), 472 deletions(-)
 create mode 100644 mmpose/apis/inferencers/base_mmpose_inferencer.py

diff --git a/demo/inferencer_demo.py b/demo/inferencer_demo.py
index 6cded95dc9..5622d05b7e 100644
--- a/demo/inferencer_demo.py
+++ b/demo/inferencer_demo.py
@@ -19,8 +19,8 @@ def parse_args():
         type=str,
         default=None,
         help='Path to the custom checkpoint file of the selected pose model. '
-        'If it is not specified and "rec" is a model name of metafile, the '
-        'weights will be loaded from metafile.')
+        'If it is not specified and "pose2d" is a model name of metafile, '
+        'the weights will be loaded from metafile.')
     parser.add_argument(
         '--instance-type',
         type=str,
@@ -52,7 +52,7 @@ def parse_args():
     parser.add_argument(
         '--show',
         action='store_true',
-        help='Display the video in a popup window.')
+        help='Display the image/video in a popup window.')
     parser.add_argument(
         '--bbox-thr',
         type=float,
@@ -69,22 +69,22 @@ def parse_args():
         '--radius',
         type=int,
         default=3,
-        help='Keypoint radius for visualization')
+        help='Keypoint radius for visualization.')
     parser.add_argument(
         '--thickness',
         type=int,
         default=1,
-        help='Link thickness for visualization')
+        help='Link thickness for visualization.')
     parser.add_argument(
         '--vis-out-dir',
         type=str,
         default='',
-        help='Output directory of visualization results.')
+        help='Directory for saving visualized results.')
     parser.add_argument(
         '--pred-out-dir',
         type=str,
         default='',
-        help='Output directory to save the inference results.')
+        help='Directory for saving inference results.')
 
     call_args = vars(parser.parse_args())
 
@@ -101,8 +101,9 @@ def parse_args():
 
 def main():
     init_args, call_args = parse_args()
-    mmaction2 = MMPoseInferencer(**init_args)
-    mmaction2(**call_args)
+    inferencer = MMPoseInferencer(**init_args)
+    for _ in inferencer(**call_args):
+        pass
 
 
 if __name__ == '__main__':
diff --git a/mmpose/apis/inferencers/base_mmpose_inferencer.py b/mmpose/apis/inferencers/base_mmpose_inferencer.py
new file mode 100644
index 0000000000..92df49764c
--- /dev/null
+++ b/mmpose/apis/inferencers/base_mmpose_inferencer.py
@@ -0,0 +1,444 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import mimetypes
+import os
+import shutil
+import tempfile
+import warnings
+from collections import defaultdict
+from typing import (Any, Callable, Dict, Generator, List, Optional, Sequence,
+                    Union)
+
+import cv2
+import mmcv
+import mmengine
+import numpy as np
+import torch.nn as nn
+from mmengine.config import Config, ConfigDict
+from mmengine.dataset import Compose
+from mmengine.fileio import (get_file_backend, isdir, join_path,
+                             list_dir_or_file)
+from mmengine.infer.infer import BaseInferencer
+from mmengine.runner.checkpoint import _load_checkpoint_to_model
+from mmengine.structures import InstanceData
+
+from mmpose.apis.inference import dataset_meta_from_config
+from mmpose.structures import PoseDataSample, split_instances
+
+InstanceList = List[InstanceData]
+InputType = Union[str, np.ndarray]
+InputsType = Union[InputType, Sequence[InputType]]
+PredType = Union[InstanceData, InstanceList]
+ImgType = Union[np.ndarray, Sequence[np.ndarray]]
+ConfigType = Union[Config, ConfigDict]
+ResType = Union[Dict, List[Dict], InstanceData, List[InstanceData]]
+
+
+class BaseMMPoseInferencer(BaseInferencer):
+    """The base class for MMPose inferencers."""
+
+    preprocess_kwargs: set = {'bbox_thr', 'nms_thr'}
+    forward_kwargs: set = set()
+    visualize_kwargs: set = {
+        'return_vis',
+        'show',
+        'wait_time',
+        'radius',
+        'thickness',
+        'kpt_thr',
+        'vis_out_dir',
+    }
+    postprocess_kwargs: set = {'pred_out_dir'}
+
+    def _load_weights_to_model(self, model: nn.Module,
+                               checkpoint: Optional[dict],
+                               cfg: Optional[ConfigType]) -> None:
+        """Loading model weights and meta information from cfg and checkpoint.
+
+        Subclasses could override this method to load extra meta information
+        from ``checkpoint`` and ``cfg`` to model.
+
+        Args:
+            model (nn.Module): Model to load weights and meta information.
+            checkpoint (dict, optional): The loaded checkpoint.
+            cfg (Config or ConfigDict, optional): The loaded config.
+        """
+        if checkpoint is not None:
+            _load_checkpoint_to_model(model, checkpoint)
+            checkpoint_meta = checkpoint.get('meta', {})
+            # save the dataset_meta in the model for convenience
+            if 'dataset_meta' in checkpoint_meta:
+                # mmpose 1.x
+                model.dataset_meta = checkpoint_meta['dataset_meta']
+            else:
+                warnings.warn(
+                    'dataset_meta are not saved in the checkpoint\'s '
+                    'meta data, load via config.')
+                model.dataset_meta = dataset_meta_from_config(
+                    cfg, dataset_mode='train')
+        else:
+            warnings.warn('Checkpoint is not loaded, and the inference '
+                          'result is calculated by the randomly initialized '
+                          'model!')
+            model.dataset_meta = dataset_meta_from_config(
+                cfg, dataset_mode='train')
+
+    def _inputs_to_list(self, inputs: InputsType) -> list:
+        """Preprocess the inputs to a list.
+
+        Preprocess inputs to a list according to its type:
+
+        - list or tuple: return inputs
+        - str:
+            - Directory path: return all files in the directory
+            - other cases: return a list containing the string. The string
+              could be a path to file, a url or other types of string
+              according to the task.
+
+        Args:
+            inputs (InputsType): Inputs for the inferencer.
+
+        Returns:
+            list: List of input for the :meth:`preprocess`.
+        """
+        self._video_input = False
+
+        if isinstance(inputs, str):
+            backend = get_file_backend(inputs)
+            if hasattr(backend, 'isdir') and isdir(inputs):
+                # Backends like HttpsBackend do not implement `isdir`, so only
+                # those backends that implement `isdir` could accept the
+                # inputs as a directory
+                filepath_list = [
+                    join_path(inputs, fname)
+                    for fname in list_dir_or_file(inputs, list_dir=False)
+                ]
+                inputs = []
+                for filepath in filepath_list:
+                    input_type = mimetypes.guess_type(filepath)[0].split(
+                        '/')[0]
+                    if input_type == 'image':
+                        inputs.append(filepath)
+                inputs = inputs
+            else:
+                # if inputs is a path to a video file, it will be converted
+                # to a list containing separated frame filenames
+                input_type = mimetypes.guess_type(inputs)[0].split('/')[0]
+                if input_type == 'video':
+                    self._video_input = True
+                    # split video frames into a temporary folder
+                    frame_folder = tempfile.TemporaryDirectory()
+                    video = mmcv.VideoReader(inputs)
+                    self.video_info = dict(
+                        fps=video.fps,
+                        name=os.path.basename(inputs),
+                        frame_folder=frame_folder)
+                    video.cvt2frames(frame_folder.name, show_progress=False)
+                    frames = sorted(list_dir_or_file(frame_folder.name))
+                    inputs = [join_path(frame_folder.name, f) for f in frames]
+
+        if not isinstance(inputs, (list, tuple)):
+            inputs = [inputs]
+
+        return list(inputs)
+
+    def _get_webcam_inputs(self, inputs: str) -> Generator:
+        """Sets up and returns a generator function that reads frames from a
+        webcam input. The generator function returns a new frame each time it
+        is iterated over.
+
+        Args:
+            inputs (str): A string describing the webcam input, in the format
+                "webcam:id".
+
+        Returns:
+            A generator function that yields frames from the webcam input.
+
+        Raises:
+            ValueError: If the inputs string is not in the expected format.
+        """
+
+        # Ensure the inputs string is in the expected format.
+        inputs = inputs.lower()
+        assert inputs.startswith('webcam'), f'Expected input to start with ' \
+            f'"webcam", but got "{inputs}"'
+
+        # Parse the camera ID from the inputs string.
+        inputs_ = inputs.split(':')
+        if len(inputs_) == 1:
+            camera_id = 0
+        elif len(inputs_) == 2 and str.isdigit(inputs_[1]):
+            camera_id = int(inputs_[1])
+        else:
+            raise ValueError(
+                f'Expected webcam input to have format "webcam:id", '
+                f'but got "{inputs}"')
+
+        # Attempt to open the video capture object.
+        vcap = cv2.VideoCapture(camera_id)
+        if not vcap.isOpened():
+            warnings.warn(f'Cannot open camera (ID={camera_id})')
+            return []
+
+        # Set video input flag and metadata.
+        self._video_input = True
+        self.video_info = dict(fps=10, name='webcam.mp4', frame_folder=None)
+
+        # Set up webcam reader generator function.
+        self._window_closing = False
+
+        def _webcam_reader() -> Generator:
+            while True:
+                if self._window_closing:
+                    vcap.release()
+                    break
+
+                ret_val, frame = vcap.read()
+                if not ret_val:
+                    break
+
+                yield frame
+
+        return _webcam_reader()
+
+    def _visualization_window_on_close(self, event):
+        self._window_closing = True
+
+    def _init_pipeline(self, cfg: ConfigType) -> Callable:
+        """Initialize the test pipeline.
+
+        Args:
+            cfg (ConfigType): model config path or dict
+
+        Returns:
+            A pipeline to handle various input data, such as ``str``,
+            ``np.ndarray``. The returned pipeline will be used to process
+            a single data.
+        """
+        return Compose(cfg.test_dataloader.dataset.pipeline)
+
+    def preprocess(self, inputs: InputsType, batch_size: int = 1, **kwargs):
+        """Process the inputs into a model-feedable format.
+
+        Args:
+            inputs (InputsType): Inputs given by user.
+            batch_size (int): batch size. Defaults to 1.
+
+        Yields:
+            Any: Data processed by the ``pipeline`` and ``collate_fn``.
+            List[str or np.ndarray]: List of original inputs in the batch
+        """
+
+        for i, input in enumerate(inputs):
+            data_infos = self.preprocess_single(input, index=i, **kwargs)
+            # only supports inference with batch size 1
+            yield self.collate_fn(data_infos), [input]
+
+    def visualize(self,
+                  inputs: list,
+                  preds: List[PoseDataSample],
+                  return_vis: bool = False,
+                  show: bool = False,
+                  wait_time: float = 0,
+                  radius: int = 3,
+                  thickness: int = 1,
+                  kpt_thr: float = 0.3,
+                  vis_out_dir: str = '',
+                  window_name: str = '',
+                  window_close_event_handler: Optional[Callable] = None
+                  ) -> List[np.ndarray]:
+        """Visualize predictions.
+
+        Args:
+            inputs (list): Inputs preprocessed by :meth:`_inputs_to_list`.
+            preds (Any): Predictions of the model.
+            return_vis (bool): Whether to return images with predicted results.
+            show (bool): Whether to display the image in a popup window.
+                Defaults to False.
+            wait_time (float): The interval of show (ms). Defaults to 0
+            radius (int): Keypoint radius for visualization. Defaults to 3
+            thickness (int): Link thickness for visualization. Defaults to 1
+            kpt_thr (float): The threshold to visualize the keypoints.
+                Defaults to 0.3
+            vis_out_dir (str, optional): Directory to save visualization
+                results w/o predictions. If left as empty, no file will
+                be saved. Defaults to ''.
+            window_name (str, optional): Title of display window.
+            window_close_event_handler (callable, optional):
+
+        Returns:
+            List[np.ndarray]: Visualization results.
+        """
+        if (not return_vis) and (not show) and (not vis_out_dir):
+            return
+
+        if getattr(self, 'visualizer', None) is None:
+            raise ValueError('Visualization needs the "visualizer" term'
+                             'defined in the config, but got None.')
+
+        self.visualizer.radius = radius
+        self.visualizer.line_width = thickness
+
+        results = []
+
+        for single_input, pred in zip(inputs, preds):
+            if isinstance(single_input, str):
+                img = mmcv.imread(single_input, channel_order='rgb')
+            elif isinstance(single_input, np.ndarray):
+                img = mmcv.bgr2rgb(single_input.copy())
+            else:
+                raise ValueError('Unsupported input type: '
+                                 f'{type(single_input)}')
+
+            img_name = os.path.basename(pred.metainfo['img_path'])
+
+            if vis_out_dir:
+                if self._video_input:
+                    out_file = join_path(vis_out_dir, 'vis_frames', img_name)
+                else:
+                    out_file = join_path(vis_out_dir, img_name)
+            else:
+                out_file = None
+
+            # since visualization and inference utilize the same process,
+            # the wait time is reduced when a video input is utilized,
+            # thereby eliminating the issue of inference getting stuck.
+            wait_time = 1e-5 if self._video_input else wait_time
+
+            window_name = window_name if window_name else img_name
+
+            visualization = self.visualizer.add_datasample(
+                window_name,
+                img,
+                pred,
+                draw_gt=False,
+                show=show,
+                wait_time=wait_time,
+                out_file=out_file,
+                kpt_score_thr=kpt_thr)
+            results.append(visualization)
+
+            if show and not hasattr(self, '_window_close_cid'):
+                if window_close_event_handler is None:
+                    window_close_event_handler = \
+                        self._visualization_window_on_close
+                self._window_close_cid = \
+                    self.visualizer.manager.canvas.mpl_connect(
+                        'close_event',
+                        window_close_event_handler
+                    )
+
+        if return_vis:
+            return results
+        else:
+            return []
+
+    def postprocess(
+        self,
+        preds: List[PoseDataSample],
+        visualization: List[np.ndarray],
+        return_datasample=False,
+        pred_out_dir: str = '',
+    ) -> dict:
+        """Process the predictions and visualization results from ``forward``
+        and ``visualize``.
+
+        This method should be responsible for the following tasks:
+
+        1. Convert datasamples into a json-serializable dict if needed.
+        2. Pack the predictions and visualization results and return them.
+        3. Dump or log the predictions.
+
+        Args:
+            preds (List[Dict]): Predictions of the model.
+            visualization (np.ndarray): Visualized predictions.
+            return_datasample (bool): Whether to return results as
+                datasamples. Defaults to False.
+            pred_out_dir (str): Directory to save the inference results w/o
+                visualization. If left as empty, no file will be saved.
+                Defaults to ''.
+
+        Returns:
+            dict: Inference and visualization results with key ``predictions``
+            and ``visualization``
+
+            - ``visualization (Any)``: Returned by :meth:`visualize`
+            - ``predictions`` (dict or DataSample): Returned by
+              :meth:`forward` and processed in :meth:`postprocess`.
+              If ``return_datasample=False``, it usually should be a
+              json-serializable dict containing only basic data elements such
+              as strings and numbers.
+        """
+
+        result_dict = defaultdict(list)
+
+        result_dict['visualization'] = visualization
+        for pred in preds:
+            if not return_datasample:
+                # convert datasamples to list of instance predictions
+                pred = split_instances(pred.pred_instances)
+            result_dict['predictions'].append(pred)
+
+        if pred_out_dir != '':
+            if self._video_input:
+                pred_out_dir = join_path(pred_out_dir, 'pred_frames')
+
+            for pred, data_sample in zip(result_dict['predictions'], preds):
+                fname = os.path.splitext(
+                    os.path.basename(
+                        data_sample.metainfo['img_path']))[0] + '.json'
+                mmengine.dump(
+                    pred, join_path(pred_out_dir, fname), indent='  ')
+
+        return result_dict
+
+    def _merge_outputs(self, vis_out_dir: str, pred_out_dir: str,
+                       **kwargs: Dict[str, Any]) -> None:
+        """Merge the visualized frames and predicted instance outputs and save
+        them.
+
+        Args:
+            vis_out_dir (str): Path to the directory where the visualized
+                frames are saved.
+            pred_out_dir (str): Path to the directory where the predicted
+                instance outputs are saved.
+            **kwargs: Other arguments that are not used in this method.
+        """
+        assert self._video_input
+
+        if vis_out_dir != '':
+            vis_frame_out_dir = join_path(vis_out_dir, 'vis_frames')
+            if not isdir(vis_frame_out_dir) or len(
+                    os.listdir(vis_frame_out_dir)) == 0:
+                warnings.warn(
+                    f'{vis_frame_out_dir} does not exist or is empty.')
+            else:
+                mmcv.frames2video(
+                    vis_frame_out_dir,
+                    join_path(vis_out_dir, self.video_info['name']),
+                    fps=self.video_info['fps'],
+                    fourcc='mp4v',
+                    show_progress=False)
+                shutil.rmtree(vis_frame_out_dir)
+
+        if pred_out_dir != '':
+            pred_frame_out_dir = join_path(pred_out_dir, 'pred_frames')
+            if not isdir(pred_frame_out_dir) or len(
+                    os.listdir(pred_frame_out_dir)) == 0:
+                warnings.warn(
+                    f'{pred_frame_out_dir} does not exist or is empty.')
+            else:
+                predictions = []
+                pred_files = list_dir_or_file(pred_frame_out_dir)
+                for frame_id, pred_file in enumerate(sorted(pred_files)):
+                    predictions.append({
+                        'frame_id':
+                        frame_id,
+                        'instances':
+                        mmengine.load(
+                            join_path(pred_frame_out_dir, pred_file))
+                    })
+                fname = os.path.splitext(
+                    os.path.basename(self.video_info['name']))[0] + '.json'
+                mmengine.dump(
+                    predictions, join_path(pred_out_dir, fname), indent='  ')
+                shutil.rmtree(pred_frame_out_dir)
diff --git a/mmpose/apis/inferencers/mmpose_inferencer.py b/mmpose/apis/inferencers/mmpose_inferencer.py
index 541a2e8f19..eb91b85508 100644
--- a/mmpose/apis/inferencers/mmpose_inferencer.py
+++ b/mmpose/apis/inferencers/mmpose_inferencer.py
@@ -1,19 +1,16 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-import mimetypes
-import os
-import tempfile
-from collections import defaultdict
+import warnings
 from typing import Dict, List, Optional, Sequence, Union
 
-import mmcv
 import numpy as np
 from mmengine.config import Config, ConfigDict
-from mmengine.fileio import (get_file_backend, isdir, join_path,
-                             list_dir_or_file)
-from mmengine.infer.infer import BaseInferencer, ModelType
+from mmengine.fileio import join_path
+from mmengine.infer.infer import ModelType
 from mmengine.structures import InstanceData
+from rich.progress import track
 
 from mmpose.structures import PoseDataSample
+from .base_mmpose_inferencer import BaseMMPoseInferencer
 from .pose2d_inferencer import Pose2DInferencer
 
 InstanceList = List[InstanceData]
@@ -25,7 +22,7 @@
 ResType = Union[Dict, List[Dict], InstanceData, List[InstanceData]]
 
 
-class MMPoseInferencer(BaseInferencer):
+class MMPoseInferencer(BaseMMPoseInferencer):
     """MMPose Inferencer. It's a unified inferencer interface for pose
     estimation task, currently including: Pose2D. and it can be used to perform
     2D keypoint detection.
@@ -41,16 +38,16 @@ class MMPoseInferencer(BaseInferencer):
 
             Defaults to ``None``.
         pose2d_weights (str, optional): Path to the custom checkpoint file of
-            the selected rec model. If it is not specified and "pose2d" is a
-            model name of metafile, the weights will be loaded from metafile.
-            Defaults to None.
+            the selected pose2d model. If it is not specified and "pose2d" is
+            a model name of metafile, the weights will be loaded from
+            metafile. Defaults to None.
         device (str, optional): Device to run inference. If None, the
             available device will be automatically used. Defaults to None.
         scope (str, optional): The scope of the model. Defaults to "mmpose".
         instance_type (str, optional): The name of the instances, such as
-            "human", "hand", "animal", and etc. This argument works as the
-            alias for the detection models, which will be utilized in
-            topdown methods. Defaults to None.
+            "human", "hand", "animal", and etc. This argument serve as
+            aliases for the detection models to be used in top-down
+            pose2d methods. Defaults to None.
         det_model(str, optional): Path to the config of detection model.
             Defaults to None.
         det_weights(str, optional): Path to the checkpoints of detection
@@ -93,29 +90,42 @@ def __init__(self,
                                                       det_weights, det_cat_ids)
             self.mode = 'pose2d'
 
-    def _init_pipeline(self, cfg: ConfigType) -> None:
-        pass
+    def preprocess(self, inputs: InputsType, batch_size: int = 1, **kwargs):
+        """Process the inputs into a model-feedable format.
+
+        Args:
+            inputs (InputsType): Inputs given by user.
+            batch_size (int): batch size. Defaults to 1.
+
+        Yields:
+            Any: Data processed by the ``pipeline`` and ``collate_fn``.
+            List[str or np.ndarray]: List of original inputs in the batch
+        """
 
-    def forward(self, inputs: InputType, batch_size: int,
-                **forward_kwargs) -> PredType:
+        for i, input in enumerate(inputs):
+            data_batch = {}
+            if 'pose2d' in self.mode:
+                data_infos = self.pose2d_inferencer.preprocess_single(
+                    input, index=i, **kwargs)
+                data_batch['pose2d'] = self.pose2d_inferencer.collate_fn(
+                    data_infos)
+            # only supports inference with batch size 1
+            yield data_batch, [input]
+
+    def forward(self, inputs: InputType, **forward_kwargs) -> PredType:
         """Forward the inputs to the model.
 
         Args:
             inputs (InputsType): The inputs to be forwarded.
-            batch_size (int): Batch size. Defaults to 1.
 
         Returns:
             Dict: The prediction results. Possibly with keys "pose2d".
         """
         result = {}
         if self.mode == 'pose2d':
-            predictions = self.pose2d_inferencer(
-                inputs,
-                return_datasample=True,
-                return_vis=False,
-                batch_size=batch_size,
-                **forward_kwargs)['predictions']
-            result['pose2d'] = predictions
+            data_samples = self.pose2d_inferencer.forward(
+                inputs['pose2d'], **forward_kwargs)
+            result['pose2d'] = data_samples
 
         return result
 
@@ -158,73 +168,37 @@ def __call__(
             postprocess_kwargs,
         ) = self._dispatch_kwargs(**kwargs)
 
-        ori_inputs = self._inputs_to_list(inputs)
-
-        preds = self.forward(ori_inputs, batch_size, **preprocess_kwargs,
-                             **forward_kwargs)
-
-        visualization = self.visualize(ori_inputs, preds, **visualize_kwargs)
-        results = self.postprocess(preds, visualization, return_datasample,
-                                   **postprocess_kwargs)
-        return results
-
-    def _inputs_to_list(self, inputs: InputsType) -> list:
-        """Preprocess the inputs to a list.
-
-        Preprocess inputs to a list according to its type:
-
-        - list or tuple: return inputs
-        - str:
-            - Directory path: return all files in the directory
-            - other cases: return a list containing the string. The string
-              could be a path to file, a url or other types of string
-              according to the task.
-
-        Args:
-            inputs (InputsType): Inputs for the inferencer.
-
-        Returns:
-            list: List of input for the :meth:`preprocess`.
-        """
-        if isinstance(inputs, str):
-            backend = get_file_backend(inputs)
-            if hasattr(backend, 'isdir') and isdir(inputs):
-                # Backends like HttpsBackend do not implement `isdir`, so only
-                # those backends that implement `isdir` could accept the
-                # inputs as a directory
-                filepath_list = [
-                    join_path(inputs, fname)
-                    for fname in list_dir_or_file(inputs, list_dir=False)
-                ]
-                inputs = []
-                for filepath in filepath_list:
-                    input_type = mimetypes.guess_type(filepath)[0].split(
-                        '/')[0]
-                    if input_type == 'image':
-                        inputs.append(filepath)
-                inputs = inputs
-            else:
-                # if inputs is a path to a video file, it will be converted
-                # to a list containing separated frame filenames
-                input_type = mimetypes.guess_type(inputs)[0].split('/')[0]
-                if input_type == 'video':
-                    if hasattr(self, 'pose2d_inferencer'):
-                        self.pose2d_inferencer.video_input = True
-                    # split video frames into a temperory folder
-                    tmp_folder = tempfile.TemporaryDirectory()
-                    video = mmcv.VideoReader(inputs)
-                    self.pose2d_inferencer.video_info = dict(
-                        fps=video.fps,
-                        name=os.path.basename(inputs),
-                        tmp_folder=tmp_folder)
-                    video.cvt2frames(tmp_folder.name, show_progress=False)
-                    frames = sorted(os.listdir(tmp_folder.name))
-                    inputs = [os.path.join(tmp_folder.name, f) for f in frames]
-
-        if not isinstance(inputs, (list, tuple)):
-            inputs = [inputs]
-
-        return list(inputs)
+        # preprocessing
+        if isinstance(inputs, str) and inputs.startswith('webcam'):
+            inputs = self._get_webcam_inputs(inputs)
+            batch_size = 1
+            if not visualize_kwargs.get('show', False):
+                warnings.warn('The display mode is closed when using webcam '
+                              'input. It will be turned on automatically.')
+            visualize_kwargs['show'] = True
+        else:
+            inputs = self._inputs_to_list(inputs)
+
+        inputs = self.preprocess(
+            inputs, batch_size=batch_size, **preprocess_kwargs)
+
+        preds = []
+        if 'pose2d' not in self.mode or not hasattr(self.pose2d_inferencer,
+                                                    'detector'):
+            inputs = track(inputs, description='Inference')
+
+        for proc_inputs, ori_inputs in inputs:
+            preds = self.forward(proc_inputs, **forward_kwargs)
+
+            visualization = self.visualize(ori_inputs, preds,
+                                           **visualize_kwargs)
+            results = self.postprocess(preds, visualization, return_datasample,
+                                       **postprocess_kwargs)
+            yield results
+
+        # merge visualization and prediction results
+        if self._video_input:
+            self._merge_outputs(**visualize_kwargs, **postprocess_kwargs)
 
     def visualize(self, inputs: InputsType, preds: PredType,
                   **kwargs) -> List[np.ndarray]:
@@ -250,33 +224,58 @@ def visualize(self, inputs: InputsType, preds: PredType,
         """
 
         if 'pose2d' in self.mode:
-            return self.pose2d_inferencer.visualize(inputs, preds['pose2d'],
-                                                    **kwargs)
+            window_name = ''
+            if self._video_input:
+                window_name = self.video_info['name']
+                if kwargs.get('vis_out_dir', ''):
+                    kwargs['vis_out_dir'] = join_path(kwargs['vis_out_dir'],
+                                                      'vis_frames')
+                if kwargs.get('show', False):
+                    kwargs['wait_time'] = 1e-5
+            return self.pose2d_inferencer.visualize(
+                inputs,
+                preds['pose2d'],
+                window_name=window_name,
+                window_close_event_handler=self._visualization_window_on_close,
+                **kwargs)
+
+    def postprocess(
+        self,
+        preds: List[PoseDataSample],
+        visualization: List[np.ndarray],
+        return_datasample=False,
+        pred_out_dir: str = '',
+    ) -> dict:
+        """Process the predictions and visualization results from ``forward``
+        and ``visualize``.
 
-    def postprocess(self,
-                    preds: List[PoseDataSample],
-                    visualization: List[np.ndarray],
-                    return_datasample=False,
-                    pred_out_dir: str = '',
-                    **kwargs) -> dict:
-        """Simply apply postprocess of pose2d_inferencer.
+        This method should be responsible for the following tasks:
+
+        1. Convert datasamples into a json-serializable dict if needed.
+        2. Pack the predictions and visualization results and return them.
+        3. Dump or log the predictions.
 
         Args:
             preds (List[Dict]): Predictions of the model.
             visualization (np.ndarray): Visualized predictions.
-            return_datasample (bool): Whether to return results as datasamples.
-                Defaults to False.
+            return_datasample (bool): Whether to return results as
+                datasamples. Defaults to False.
             pred_out_dir (str): Directory to save the inference results w/o
                 visualization. If left as empty, no file will be saved.
                 Defaults to ''.
-        """
-        result_dict = defaultdict(list)
 
-        result_dict['visualization'] = visualization
+        Returns:
+            dict: Inference and visualization results with key ``predictions``
+            and ``visualization``
+
+            - ``visualization (Any)``: Returned by :meth:`visualize`
+            - ``predictions`` (dict or DataSample): Returned by
+              :meth:`forward` and processed in :meth:`postprocess`.
+              If ``return_datasample=False``, it usually should be a
+              json-serializable dict containing only basic data elements such
+              as strings and numbers.
+        """
 
         if 'pose2d' in self.mode:
-            result_dict['predictions'] = self.pose2d_inferencer.postprocess(
-                preds['pose2d'], visualization, return_datasample,
-                pred_out_dir, **kwargs)['predictions']
-
-        return result_dict
+            return super().postprocess(preds['pose2d'], visualization,
+                                       return_datasample, pred_out_dir)
diff --git a/mmpose/apis/inferencers/pose2d_inferencer.py b/mmpose/apis/inferencers/pose2d_inferencer.py
index 34f6f46602..869764769b 100644
--- a/mmpose/apis/inferencers/pose2d_inferencer.py
+++ b/mmpose/apis/inferencers/pose2d_inferencer.py
@@ -1,31 +1,20 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-import mimetypes
-import os
-import tempfile
 import warnings
-from collections import defaultdict
-from typing import Callable, Dict, List, Optional, Sequence, Union
+from typing import Dict, List, Optional, Sequence, Tuple, Union
 
 import mmcv
-import mmengine
 import numpy as np
-import torch.nn as nn
 from mmdet.apis.det_inferencer import DetInferencer
 from mmengine.config import Config, ConfigDict
-from mmengine.dataset import Compose
-from mmengine.fileio import (get_file_backend, isdir, join_path,
-                             list_dir_or_file)
-from mmengine.infer.infer import BaseInferencer, ModelType
+from mmengine.infer.infer import ModelType
 from mmengine.registry import init_default_scope
-from mmengine.runner.checkpoint import _load_checkpoint_to_model
 from mmengine.structures import InstanceData
 from rich.progress import track
 
-from mmpose.apis.inference import dataset_meta_from_config
 from mmpose.evaluation.functional import nms
 from mmpose.registry import DATASETS, INFERENCERS
-from mmpose.structures import (PoseDataSample, merge_data_samples,
-                               split_instances)
+from mmpose.structures import merge_data_samples
+from .base_mmpose_inferencer import BaseMMPoseInferencer
 from .utils import default_det_models
 
 InstanceList = List[InstanceData]
@@ -39,25 +28,29 @@
 
 @INFERENCERS.register_module(name='pose-estimation')
 @INFERENCERS.register_module()
-class Pose2DInferencer(BaseInferencer):
+class Pose2DInferencer(BaseMMPoseInferencer):
     """The inferencer for 2D pose estimation.
 
     Args:
-        model (str, optional): Path to the config file or the model name
-            defined in metafile. For example, it could be
-            "td-hm_hrnet-w32_8xb64-210e_coco-256x192" or
-            "configs/body_2d_keypoint/topdown_heatmap/coco/" \\
-            "td-hm_hrnet-w32_8xb64-210e_coco-256x192.py"
+        model (str, optional): Pretrained 2D pose estimation algorithm.
+            It's the path to the config file or the model name defined in
+            metafile. For example, it could be:
+
+            - model alias, e.g. ``'body'``,
+            - config name, e.g. ``'simcc_res50_8xb64-210e_coco-256x192'``,
+            - config path
+
+            Defaults to ``None``.
         weights (str, optional): Path to the checkpoint. If it is not
-            specified and model is a model name of metafile, the weights
+            specified and "model" is a model name of metafile, the weights
             will be loaded from metafile. Defaults to None.
         device (str, optional): Device to run inference. If None, the
             available device will be automatically used. Defaults to None.
         scope (str, optional): The scope of the model. Defaults to "mmpose".
         instance_type (str, optional): The name of the instances, such as
-            "human", "hand", "animal", and etc. This argument works as the
-            alias for the detection models, which will be utilized in
-            topdown methods. Defaults to None.
+            "human", "hand", "animal", and etc. This argument serve as
+            aliases for the detection models to be used in top-down
+            approaches. Defaults to None.
         det_model(str, optional): Path to the config of detection model.
             Defaults to None.
         det_weights(str, optional): Path to the checkpoints of detection
@@ -87,7 +80,7 @@ def __init__(self,
                  instance_type: Optional[str] = None,
                  det_model: Optional[Union[ModelType, str]] = None,
                  det_weights: Optional[str] = None,
-                 det_cat_ids: Optional[Union[int, List]] = None) -> None:
+                 det_cat_ids: Optional[Union[int, Tuple]] = None) -> None:
 
         init_default_scope(scope)
         super().__init__(
@@ -114,174 +107,77 @@ def __init__(self,
 
             self.detector = DetInferencer(
                 det_model, det_weights, device=device)
-            self.det_cat_ids = det_cat_ids
-
-        self.video_input = False
-
-    def _load_weights_to_model(self, model: nn.Module,
-                               checkpoint: Optional[dict],
-                               cfg: Optional[ConfigType]) -> None:
-        """Loading model weights and meta information from cfg and checkpoint.
-
-        Subclasses could override this method to load extra meta information
-        from ``checkpoint`` and ``cfg`` to model.
-
-        Args:
-            model (nn.Module): Model to load weights and meta information.
-            checkpoint (dict, optional): The loaded checkpoint.
-            cfg (Config or ConfigDict, optional): The loaded config.
-        """
-        if checkpoint is not None:
-            _load_checkpoint_to_model(model, checkpoint)
-            checkpoint_meta = checkpoint.get('meta', {})
-            # save the dataset_meta in the model for convenience
-            if 'dataset_meta' in checkpoint_meta:
-                # mmpose 1.x
-                model.dataset_meta = checkpoint_meta['dataset_meta']
+            if isinstance(det_cat_ids, (tuple, list)):
+                self.det_cat_ids = det_cat_ids
             else:
-                warnings.warn(
-                    'dataset_meta are not saved in the checkpoint\'s '
-                    'meta data, load via config.')
-                model.dataset_meta = dataset_meta_from_config(
-                    cfg, dataset_mode='train')
-        else:
-            warnings.warn('Checkpoint is not loaded, and the inference '
-                          'result is calculated by the randomly initialized '
-                          'model!')
-            model.dataset_meta = dataset_meta_from_config(
-                cfg, dataset_mode='train')
-
-    def _inputs_to_list(self, inputs: InputsType) -> list:
-        """Preprocess the inputs to a list.
+                self.det_cat_ids = (det_cat_ids, )
 
-        Preprocess inputs to a list according to its type:
-
-        - list or tuple: return inputs
-        - str:
-            - Directory path: return all files in the directory
-            - other cases: return a list containing the string. The string
-              could be a path to file, a url or other types of string
-              according to the task.
-
-        Args:
-            inputs (InputsType): Inputs for the inferencer.
+        self._video_input = False
 
-        Returns:
-            list: List of input for the :meth:`preprocess`.
-        """
-        if isinstance(inputs, str):
-            backend = get_file_backend(inputs)
-            if hasattr(backend, 'isdir') and isdir(inputs):
-                # Backends like HttpsBackend do not implement `isdir`, so only
-                # those backends that implement `isdir` could accept the
-                # inputs as a directory
-                filepath_list = [
-                    join_path(inputs, fname)
-                    for fname in list_dir_or_file(inputs, list_dir=False)
-                ]
-                inputs = []
-                for filepath in filepath_list:
-                    input_type = mimetypes.guess_type(filepath)[0].split(
-                        '/')[0]
-                    if input_type == 'image':
-                        inputs.append(filepath)
-                inputs = inputs
-            else:
-                # if inputs is a path to a video file, it will be converted
-                # to a list containing separated frame filenames
-                input_type = mimetypes.guess_type(inputs)[0].split('/')[0]
-                if input_type == 'video':
-                    self.video_input = True
-                    # split video frames into a temperory folder
-                    tmp_folder = tempfile.TemporaryDirectory()
-                    video = mmcv.VideoReader(inputs)
-                    self.video_info = dict(
-                        fps=video.fps,
-                        name=os.path.basename(inputs),
-                        tmp_folder=tmp_folder)
-                    video.cvt2frames(tmp_folder.name, show_progress=False)
-                    frames = sorted(os.listdir(tmp_folder.name))
-                    inputs = [os.path.join(tmp_folder.name, f) for f in frames]
-
-        if not isinstance(inputs, (list, tuple)):
-            inputs = [inputs]
-
-        return list(inputs)
-
-    def _init_pipeline(self, cfg: ConfigType) -> Callable:
-        """Initialize the test pipeline.
+    def preprocess_single(self,
+                          input: InputType,
+                          index: int,
+                          bbox_thr: float = 0.3,
+                          nms_thr: float = 0.3):
+        """Process a single input into a model-feedable format.
 
         Args:
-            cfg (ConfigType): model config path or dict
-
-        Returns:
-            A pipeline to handle various input data, such as ``str``,
-            ``np.ndarray``. The returned pipeline will be used to process
-            a single data.
-        """
-        return Compose(cfg.test_dataloader.dataset.pipeline)
-
-    def preprocess(self,
-                   inputs: InputsType,
-                   batch_size: int = 1,
-                   bbox_thr: float = 0.3,
-                   nms_thr: float = 0.3):
-        """Process the inputs into a model-feedable format.
-
-        1. for topdown models, detection is conducted in this function
-        2. inputs are converted into the format same as the data_info
-           provided by dataset and then fed to pipeline
-        3. batch_size is ineffective here
+            input (InputType): Input given by user.
+            index (int): index of the input
+            bbox_thr (float): threshold for bounding box detection.
+                Defaults to 0.3.
+            nms_thr (float): IoU threshold for bounding box NMS.
+                Defaults to 0.3.
+
+        Yields:
+            Any: Data processed by the ``pipeline`` and ``collate_fn``.
         """
 
-        for i, input in enumerate(inputs):
+        if isinstance(input, str):
+            data_info = dict(img_path=input)
+        else:
+            data_info = dict(img=input, img_path=f'{index}.jpg'.rjust(10, '0'))
+        data_info.update(self.model.dataset_meta)
 
-            if isinstance(input, str):
-                data_info = dict(img_path=input)
-            else:
-                data_info = dict(img=input, img_path=f'{i+1}.jpg'.rjust(10))
-            data_info.update(self.model.dataset_meta)
-
-            if self.cfg.data_mode == 'topdown':
-                det_results = self.detector(
-                    input, return_datasample=True)['predictions']
-                pred_instance = det_results[0].pred_instances.cpu().numpy()
-                bboxes = np.concatenate(
-                    (pred_instance.bboxes, pred_instance.scores[:, None]),
-                    axis=1)
-
-                label_mask = np.zeros(len(bboxes), dtype=np.uint8)
-                for cat_id in self.det_cat_ids:
-                    label_mask = np.logical_or(label_mask,
-                                               pred_instance.labels == cat_id)
-
-                bboxes = bboxes[np.logical_and(
-                    label_mask, pred_instance.scores > bbox_thr)]
-                bboxes = bboxes[nms(bboxes, nms_thr)]
-
-                data_infos = []
-                if len(bboxes) > 0:
-                    for bbox in bboxes:
-                        inst = data_info.copy()
-                        inst['bbox'] = bbox[None, :4]
-                        inst['bbox_score'] = bbox[4:5]
-                        data_infos.append(self.pipeline(inst))
-                else:
+        if self.cfg.data_mode == 'topdown':
+            det_results = self.detector(
+                input, return_datasample=True)['predictions']
+            pred_instance = det_results[0].pred_instances.cpu().numpy()
+            bboxes = np.concatenate(
+                (pred_instance.bboxes, pred_instance.scores[:, None]), axis=1)
+
+            label_mask = np.zeros(len(bboxes), dtype=np.uint8)
+            for cat_id in self.det_cat_ids:
+                label_mask = np.logical_or(label_mask,
+                                           pred_instance.labels == cat_id)
+
+            bboxes = bboxes[np.logical_and(label_mask,
+                                           pred_instance.scores > bbox_thr)]
+            bboxes = bboxes[nms(bboxes, nms_thr)]
+
+            data_infos = []
+            if len(bboxes) > 0:
+                for bbox in bboxes:
                     inst = data_info.copy()
+                    inst['bbox'] = bbox[None, :4]
+                    inst['bbox_score'] = bbox[4:5]
+                    data_infos.append(self.pipeline(inst))
+            else:
+                inst = data_info.copy()
 
-                    # get bbox from the image size
-                    if isinstance(input, str):
-                        input = mmcv.imread(input)
-                    h, w = input.shape[:2]
+                # get bbox from the image size
+                if isinstance(input, str):
+                    input = mmcv.imread(input)
+                h, w = input.shape[:2]
 
-                    inst['bbox'] = np.array([[0, 0, w, h]], dtype=np.float32)
-                    inst['bbox_score'] = np.ones(1, dtype=np.float32)
-                    data_infos.append(self.pipeline(inst))
+                inst['bbox'] = np.array([[0, 0, w, h]], dtype=np.float32)
+                inst['bbox_score'] = np.ones(1, dtype=np.float32)
+                data_infos.append(self.pipeline(inst))
 
-            else:  # bottom-up
-                data_infos = [self.pipeline(data_info)]
+        else:  # bottom-up
+            data_infos = [self.pipeline(data_info)]
 
-            yield self.collate_fn(data_infos)
+        return data_infos
 
     def forward(self, inputs: Union[dict, tuple]):
         data_samples = super().forward(inputs)
@@ -329,164 +225,33 @@ def __call__(
             postprocess_kwargs,
         ) = self._dispatch_kwargs(**kwargs)
 
-        ori_inputs = self._inputs_to_list(inputs)
+        # preprocessing
+        if isinstance(inputs, str) and inputs.startswith('webcam'):
+            inputs = self._get_webcam_inputs(inputs)
+            batch_size = 1
+            if not visualize_kwargs.get('show', False):
+                warnings.warn('The display mode is closed when using webcam '
+                              'input. It will be turned on automatically.')
+            visualize_kwargs['show'] = True
+        else:
+            inputs = self._inputs_to_list(inputs)
+
         inputs = self.preprocess(
-            ori_inputs, batch_size=batch_size, **preprocess_kwargs)
+            inputs, batch_size=batch_size, **preprocess_kwargs)
+
         preds = []
         if not hasattr(self, 'detector'):
             inputs = track(inputs, description='Inference')
-        for data in inputs:
-            preds.extend(self.forward(data, **forward_kwargs))
-        visualization = self.visualize(ori_inputs, preds, **visualize_kwargs)
-        results = self.postprocess(preds, visualization, return_datasample,
-                                   **postprocess_kwargs)
-        return results
-
-    def visualize(self,
-                  inputs: list,
-                  preds: List[PoseDataSample],
-                  return_vis: bool = False,
-                  show: bool = False,
-                  wait_time: int = 0,
-                  radius: int = 3,
-                  thickness: int = 1,
-                  kpt_thr: float = 0.3,
-                  vis_out_dir: str = '') -> List[np.ndarray]:
-        """Visualize predictions.
 
-        Args:
-            inputs (list): Inputs preprocessed by :meth:`_inputs_to_list`.
-            preds (Any): Predictions of the model.
-            return_vis (bool): Whether to return images with predicted results.
-            show (bool): Whether to display the image in a popup window.
-                Defaults to False.
-            show_interval (int): The interval of show (s). Defaults to 0
-            radius (int): Keypoint radius for visualization. Defaults to 3
-            thickness (int): Link thickness for visualization. Defaults to 1
-            kpt_thr (float): The threshold to visualize the keypoints.
-                Defaults to 0.3
-            vis_out_dir (str, optional): directory to save visualization
-                results w/o predictions. If left as empty, no file will
-                be saved. Defaults to ''.
-
-        Returns:
-            List[np.ndarray]: Visualization results.
-        """
-        if (not return_vis) and (not show) and (vis_out_dir is None):
-            return
+        for proc_inputs, ori_inputs in inputs:
+            preds = self.forward(proc_inputs, **forward_kwargs)
 
-        if getattr(self, 'visualizer', None) is None:
-            raise ValueError('Visualization needs the "visualizer" term'
-                             'defined in the config, but got None.')
+            visualization = self.visualize(ori_inputs, preds,
+                                           **visualize_kwargs)
+            results = self.postprocess(preds, visualization, return_datasample,
+                                       **postprocess_kwargs)
+            yield results
 
-        self.visualizer.radius = radius
-        self.visualizer.line_width = thickness
-
-        results = []
-
-        for single_input, pred in zip(inputs, preds):
-            if isinstance(single_input, str):
-                img = mmcv.imread(single_input, channel_order='rgb')
-            elif isinstance(single_input, np.ndarray):
-                img = single_input.copy()
-            else:
-                raise ValueError('Unsupported input type: '
-                                 f'{type(single_input)}')
-
-            img_name = os.path.basename(pred.metainfo['img_path'])
-
-            if self.video_input:
-                out_file = os.path.join(self.video_info['tmp_folder'].name,
-                                        img_name)
-            elif vis_out_dir:
-                out_file = os.path.join(vis_out_dir, img_name)
-            else:
-                out_file = None
-
-            visualization = self.visualizer.add_datasample(
-                img_name,
-                img,
-                pred,
-                draw_gt=False,
-                show=show,
-                wait_time=wait_time,
-                out_file=out_file,
-                kpt_score_thr=kpt_thr)
-            results.append(visualization)
-
-        if self.video_input and vis_out_dir:
-            # merge saved frame images into video
-            os.makedirs(vis_out_dir, exist_ok=True)
-            mmcv.frames2video(
-                self.video_info['tmp_folder'].name,
-                f'{vis_out_dir}/{self.video_info["name"]}',
-                fps=self.video_info['fps'],
-                fourcc='mp4v',
-                show_progress=False)
-
-        return results
-
-    def postprocess(
-        self,
-        preds: List[PoseDataSample],
-        visualization: List[np.ndarray],
-        return_datasample=False,
-        pred_out_dir: str = '',
-    ) -> dict:
-        """Process the predictions and visualization results from ``forward``
-        and ``visualize``.
-
-        This method should be responsible for the following tasks:
-
-        1. Convert datasamples into a json-serializable dict if needed.
-        2. Pack the predictions and visualization results and return them.
-        3. Dump or log the predictions.
-
-        Args:
-            preds (List[Dict]): Predictions of the model.
-            visualization (np.ndarray): Visualized predictions.
-            return_datasample (bool): Whether to return results as
-                datasamples. Defaults to False.
-            pred_out_dir (str): Directory to save the inference results w/o
-                visualization. If left as empty, no file will be saved.
-                Defaults to ''.
-
-        Returns:
-            dict: Inference and visualization results with key ``predictions``
-            and ``visualization``
-
-            - ``visualization (Any)``: Returned by :meth:`visualize`
-            - ``predictions`` (dict or DataSample): Returned by
-              :meth:`forward` and processed in :meth:`postprocess`.
-              If ``return_datasample=False``, it usually should be a
-              json-serializable dict containing only basic data elements such
-              as strings and numbers.
-        """
-
-        result_dict = defaultdict(list)
-
-        result_dict['visualization'] = visualization
-        for pred in preds:
-            if not return_datasample:
-                # convert datasamples to list of instance predictions
-                pred = split_instances(pred.pred_instances)
-            result_dict['predictions'].append(pred)
-
-        if pred_out_dir != '':
-            if self.video_input:
-                result_dict['predictions'] = [{
-                    'frame_id': i,
-                    'instances': pred
-                } for i, pred in enumerate(result_dict['predictions'])]
-                fname = os.path.splitext(self.video_info['name'])[0] + '.json'
-                mmengine.dump(result_dict['predictions'],
-                              join_path(pred_out_dir, fname))
-            else:
-                for pred, data_sample in zip(result_dict['predictions'],
-                                             preds):
-                    fname = os.path.splitext(
-                        os.path.basename(
-                            data_sample.metainfo['img_path']))[0] + '.json'
-                    mmengine.dump(pred, join_path(pred_out_dir, fname))
-
-        return result_dict
+        # merge visualization and prediction results
+        if self._video_input:
+            self._merge_outputs(**visualize_kwargs, **postprocess_kwargs)
diff --git a/tests/test_apis/test_inferencers/test_mmpose_inferencer.py b/tests/test_apis/test_inferencers/test_mmpose_inferencer.py
index 03c649982c..cb4ebe3426 100644
--- a/tests/test_apis/test_inferencers/test_mmpose_inferencer.py
+++ b/tests/test_apis/test_inferencers/test_mmpose_inferencer.py
@@ -1,6 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import os
 import os.path as osp
+from collections import defaultdict
 from tempfile import TemporaryDirectory
 from unittest import TestCase
 
@@ -22,7 +23,7 @@ def test_call(self):
 
         # `inputs` is path to an image
         inputs = img_path
-        results1 = inferencer(inputs, return_vis=True)
+        results1 = next(inferencer(inputs, return_vis=True))
         self.assertIn('visualization', results1)
         self.assertSequenceEqual(results1['visualization'][0].shape, img.shape)
         self.assertIn('predictions', results1)
@@ -31,35 +32,43 @@ def test_call(self):
 
         # `inputs` is an image array
         inputs = img
-        results2 = inferencer(inputs)
+        results2 = next(inferencer(inputs))
         self.assertEqual(
             len(results1['predictions'][0]), len(results2['predictions'][0]))
         self.assertSequenceEqual(results1['predictions'][0][0]['keypoints'],
                                  results2['predictions'][0][0]['keypoints'])
-        results2 = inferencer(inputs, return_datasample=True)
+        results2 = next(inferencer(inputs, return_datasample=True))
         self.assertIsInstance(results2['predictions'][0], PoseDataSample)
 
         # `inputs` is path to a directory
         inputs = osp.dirname(img_path)
         with TemporaryDirectory() as tmp_dir:
             # only save visualizations
-            results3 = inferencer(inputs, vis_out_dir=tmp_dir)
+            for res in inferencer(inputs, vis_out_dir=tmp_dir):
+                pass
             self.assertEqual(len(os.listdir(tmp_dir)), 4)
             # save both visualizations and predictions
-            results3 = inferencer(inputs, out_dir=tmp_dir)
+            results3 = defaultdict(list)
+            for res in inferencer(inputs, out_dir=tmp_dir):
+                for key in res:
+                    results3[key].extend(res[key])
             self.assertEqual(len(os.listdir(f'{tmp_dir}/visualizations')), 4)
             self.assertEqual(len(os.listdir(f'{tmp_dir}/predictions')), 4)
         self.assertEqual(len(results3['predictions']), 4)
         self.assertSequenceEqual(results1['predictions'][0][0]['keypoints'],
-                                 results3['predictions'][3][0]['keypoints'])
+                                 results3['predictions'][2][0]['keypoints'])
 
         # `inputs` is path to a video
         inputs = 'tests/data/posetrack18/videos/000001_mpiinew_test/' \
                  '000001_mpiinew_test.mp4'
         with TemporaryDirectory() as tmp_dir:
-            results = inferencer(inputs, out_dir=tmp_dir)
+            results = defaultdict(list)
+            for res in inferencer(inputs, out_dir=tmp_dir):
+                for key in res:
+                    results[key].extend(res[key])
             self.assertIn('000001_mpiinew_test.mp4',
                           os.listdir(f'{tmp_dir}/visualizations'))
             self.assertIn('000001_mpiinew_test.json',
                           os.listdir(f'{tmp_dir}/predictions'))
+        self.assertTrue(inferencer._video_input)
         self.assertIn(len(results['predictions']), (4, 5))
diff --git a/tests/test_apis/test_inferencers/test_pose2d_inferencer.py b/tests/test_apis/test_inferencers/test_pose2d_inferencer.py
index b72a870a8b..86389ba6df 100644
--- a/tests/test_apis/test_inferencers/test_pose2d_inferencer.py
+++ b/tests/test_apis/test_inferencers/test_pose2d_inferencer.py
@@ -1,6 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import os
 import os.path as osp
+from collections import defaultdict
 from tempfile import TemporaryDirectory
 from unittest import TestCase
 
@@ -71,7 +72,7 @@ def test_call(self):
 
         # `inputs` is path to an image
         inputs = img_path
-        results1 = inferencer(inputs, return_vis=True)
+        results1 = next(inferencer(inputs, return_vis=True))
         self.assertIn('visualization', results1)
         self.assertSequenceEqual(results1['visualization'][0].shape, img.shape)
         self.assertIn('predictions', results1)
@@ -80,36 +81,44 @@ def test_call(self):
 
         # `inputs` is an image array
         inputs = img
-        results2 = inferencer(inputs)
+        results2 = next(inferencer(inputs))
         self.assertEqual(
             len(results1['predictions'][0]), len(results2['predictions'][0]))
         self.assertSequenceEqual(results1['predictions'][0][0]['keypoints'],
                                  results2['predictions'][0][0]['keypoints'])
-        results2 = inferencer(inputs, return_datasample=True)
+        results2 = next(inferencer(inputs, return_datasample=True))
         self.assertIsInstance(results2['predictions'][0], PoseDataSample)
 
         # `inputs` is path to a directory
         inputs = osp.dirname(img_path)
+
         with TemporaryDirectory() as tmp_dir:
             # only save visualizations
-            results3 = inferencer(inputs, vis_out_dir=tmp_dir)
+            for res in inferencer(inputs, vis_out_dir=tmp_dir):
+                pass
             self.assertEqual(len(os.listdir(tmp_dir)), 4)
             # save both visualizations and predictions
-            results3 = inferencer(inputs, out_dir=tmp_dir)
+            results3 = defaultdict(list)
+            for res in inferencer(inputs, out_dir=tmp_dir):
+                for key in res:
+                    results3[key].extend(res[key])
             self.assertEqual(len(os.listdir(f'{tmp_dir}/visualizations')), 4)
             self.assertEqual(len(os.listdir(f'{tmp_dir}/predictions')), 4)
         self.assertEqual(len(results3['predictions']), 4)
         self.assertSequenceEqual(results1['predictions'][0][0]['keypoints'],
-                                 results3['predictions'][3][0]['keypoints'])
+                                 results3['predictions'][2][0]['keypoints'])
 
         # `inputs` is path to a video
         inputs = 'tests/data/posetrack18/videos/000001_mpiinew_test/' \
                  '000001_mpiinew_test.mp4'
         with TemporaryDirectory() as tmp_dir:
-            results = inferencer(inputs, out_dir=tmp_dir)
+            results = defaultdict(list)
+            for res in inferencer(inputs, out_dir=tmp_dir):
+                for key in res:
+                    results[key].extend(res[key])
             self.assertIn('000001_mpiinew_test.mp4',
                           os.listdir(f'{tmp_dir}/visualizations'))
             self.assertIn('000001_mpiinew_test.json',
                           os.listdir(f'{tmp_dir}/predictions'))
-        self.assertTrue(inferencer.video_input)
+        self.assertTrue(inferencer._video_input)
         self.assertIn(len(results['predictions']), (4, 5))

From 084629cc5ff62a11b4ea456ee9fe5cd8f779215a Mon Sep 17 00:00:00 2001
From: lupeng <penglu2097@gmail.com>
Date: Fri, 17 Feb 2023 23:22:10 +0800
Subject: [PATCH 06/17] fix ut

---
 tests/test_apis/test_inferencers/test_mmpose_inferencer.py | 2 +-
 tests/test_apis/test_inferencers/test_pose2d_inferencer.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_apis/test_inferencers/test_mmpose_inferencer.py b/tests/test_apis/test_inferencers/test_mmpose_inferencer.py
index cb4ebe3426..3df85fc46e 100644
--- a/tests/test_apis/test_inferencers/test_mmpose_inferencer.py
+++ b/tests/test_apis/test_inferencers/test_mmpose_inferencer.py
@@ -56,7 +56,7 @@ def test_call(self):
             self.assertEqual(len(os.listdir(f'{tmp_dir}/predictions')), 4)
         self.assertEqual(len(results3['predictions']), 4)
         self.assertSequenceEqual(results1['predictions'][0][0]['keypoints'],
-                                 results3['predictions'][2][0]['keypoints'])
+                                 results3['predictions'][3][0]['keypoints'])
 
         # `inputs` is path to a video
         inputs = 'tests/data/posetrack18/videos/000001_mpiinew_test/' \
diff --git a/tests/test_apis/test_inferencers/test_pose2d_inferencer.py b/tests/test_apis/test_inferencers/test_pose2d_inferencer.py
index 86389ba6df..06f0ceafe1 100644
--- a/tests/test_apis/test_inferencers/test_pose2d_inferencer.py
+++ b/tests/test_apis/test_inferencers/test_pose2d_inferencer.py
@@ -106,7 +106,7 @@ def test_call(self):
             self.assertEqual(len(os.listdir(f'{tmp_dir}/predictions')), 4)
         self.assertEqual(len(results3['predictions']), 4)
         self.assertSequenceEqual(results1['predictions'][0][0]['keypoints'],
-                                 results3['predictions'][2][0]['keypoints'])
+                                 results3['predictions'][3][0]['keypoints'])
 
         # `inputs` is path to a video
         inputs = 'tests/data/posetrack18/videos/000001_mpiinew_test/' \

From c1b61632a19f2d7ea053a62fb139ada04e8d69d0 Mon Sep 17 00:00:00 2001
From: lupeng <penglu2097@gmail.com>
Date: Mon, 20 Feb 2023 14:23:33 +0800
Subject: [PATCH 07/17] merge arg `instance_type` into `det_model`

---
 demo/inferencer_demo.py                       | 12 +++--------
 mmpose/apis/inferencers/mmpose_inferencer.py  | 10 ++-------
 mmpose/apis/inferencers/pose2d_inferencer.py  | 21 +++++--------------
 .../test_pose2d_inferencer.py                 |  5 -----
 4 files changed, 10 insertions(+), 38 deletions(-)

diff --git a/demo/inferencer_demo.py b/demo/inferencer_demo.py
index 5622d05b7e..df4877e6d9 100644
--- a/demo/inferencer_demo.py
+++ b/demo/inferencer_demo.py
@@ -21,17 +21,11 @@ def parse_args():
         help='Path to the custom checkpoint file of the selected pose model. '
         'If it is not specified and "pose2d" is a model name of metafile, '
         'the weights will be loaded from metafile.')
-    parser.add_argument(
-        '--instance-type',
-        type=str,
-        default=None,
-        help='The name of the target instances, such as'
-        '"human", "hand", "animal", and etc.')
     parser.add_argument(
         '--det-model',
         type=str,
         default=None,
-        help='Path to the config of detection model.')
+        help='Config path or alias of detection model.')
     parser.add_argument(
         '--det-weights',
         type=str,
@@ -89,8 +83,8 @@ def parse_args():
     call_args = vars(parser.parse_args())
 
     init_kws = [
-        'pose2d', 'pose2d_weights', 'device', 'instance_type', 'det_model',
-        'det_weights', 'det_cat_ids'
+        'pose2d', 'pose2d_weights', 'device', 'det_model', 'det_weights',
+        'det_cat_ids'
     ]
     init_args = {}
     for init_kw in init_kws:
diff --git a/mmpose/apis/inferencers/mmpose_inferencer.py b/mmpose/apis/inferencers/mmpose_inferencer.py
index eb91b85508..f5b23fb125 100644
--- a/mmpose/apis/inferencers/mmpose_inferencer.py
+++ b/mmpose/apis/inferencers/mmpose_inferencer.py
@@ -44,11 +44,7 @@ class MMPoseInferencer(BaseMMPoseInferencer):
         device (str, optional): Device to run inference. If None, the
             available device will be automatically used. Defaults to None.
         scope (str, optional): The scope of the model. Defaults to "mmpose".
-        instance_type (str, optional): The name of the instances, such as
-            "human", "hand", "animal", and etc. This argument serve as
-            aliases for the detection models to be used in top-down
-            pose2d methods. Defaults to None.
-        det_model(str, optional): Path to the config of detection model.
+        det_model(str, optional): Config path or alias of detection model.
             Defaults to None.
         det_weights(str, optional): Path to the checkpoints of detection
             model. Defaults to None.
@@ -74,7 +70,6 @@ def __init__(self,
                  pose2d_weights: Optional[str] = None,
                  device: Optional[str] = None,
                  scope: str = 'mmpose',
-                 instance_type: Optional[str] = None,
                  det_model: Optional[Union[ModelType, str]] = None,
                  det_weights: Optional[str] = None,
                  det_cat_ids: Optional[Union[int, List]] = None) -> None:
@@ -85,8 +80,7 @@ def __init__(self,
         self.visualizer = None
         if pose2d is not None:
             self.pose2d_inferencer = Pose2DInferencer(pose2d, pose2d_weights,
-                                                      device, scope,
-                                                      instance_type, det_model,
+                                                      device, scope, det_model,
                                                       det_weights, det_cat_ids)
             self.mode = 'pose2d'
 
diff --git a/mmpose/apis/inferencers/pose2d_inferencer.py b/mmpose/apis/inferencers/pose2d_inferencer.py
index 869764769b..30ebd7c711 100644
--- a/mmpose/apis/inferencers/pose2d_inferencer.py
+++ b/mmpose/apis/inferencers/pose2d_inferencer.py
@@ -47,11 +47,7 @@ class Pose2DInferencer(BaseMMPoseInferencer):
         device (str, optional): Device to run inference. If None, the
             available device will be automatically used. Defaults to None.
         scope (str, optional): The scope of the model. Defaults to "mmpose".
-        instance_type (str, optional): The name of the instances, such as
-            "human", "hand", "animal", and etc. This argument serve as
-            aliases for the detection models to be used in top-down
-            approaches. Defaults to None.
-        det_model(str, optional): Path to the config of detection model.
+        det_model(str, optional): Config path or alias of detection model.
             Defaults to None.
         det_weights(str, optional): Path to the checkpoints of detection
             model. Defaults to None.
@@ -77,7 +73,6 @@ def __init__(self,
                  weights: Optional[str] = None,
                  device: Optional[str] = None,
                  scope: Optional[str] = 'mmpose',
-                 instance_type: Optional[str] = None,
                  det_model: Optional[Union[ModelType, str]] = None,
                  det_weights: Optional[str] = None,
                  det_cat_ids: Optional[Union[int, Tuple]] = None) -> None:
@@ -92,16 +87,10 @@ def __init__(self,
         # initialize detector for top-down models
         if self.cfg.data_mode == 'topdown':
             if det_model is None:
-                if instance_type is None:
-                    instance_type = DATASETS.get(
-                        self.cfg.dataset_type).__module__.split(
-                            'datasets.')[-1].split('.')[0]
-                if instance_type not in default_det_models:
-                    raise ValueError(
-                        f'detector for {instance_type} has not been '
-                        'provided. You need to specify `det_model`, '
-                        '`det_weights` and `det_cat_ids` manually')
-                det_info = default_det_models[instance_type.lower()]
+                det_model = DATASETS.get(
+                    self.cfg.dataset_type).__module__.split(
+                        'datasets.')[-1].split('.')[0].lower()
+                det_info = default_det_models[det_model]
                 det_model, det_weights, det_cat_ids = det_info[
                     'model'], det_info['weights'], det_info['cat_ids']
 
diff --git a/tests/test_apis/test_inferencers/test_pose2d_inferencer.py b/tests/test_apis/test_inferencers/test_pose2d_inferencer.py
index 06f0ceafe1..f402d05d19 100644
--- a/tests/test_apis/test_inferencers/test_pose2d_inferencer.py
+++ b/tests/test_apis/test_inferencers/test_pose2d_inferencer.py
@@ -57,11 +57,6 @@ def _test_init(self):
         self.assertIsInstance(inferencer.model, torch.nn.Module)
         self.assertFalse(hasattr(inferencer, 'detector'))
 
-        # init with incorrect `instance_type`
-        with self.assertRaises(ValueError):
-            inferencer = Pose2DInferencer(
-                model='animal', instance_type='aminal')
-
     def test_call(self):
 
         # top-down model

From 77a963fafdf365ab2fedbd31db36eb4d56cdefdd Mon Sep 17 00:00:00 2001
From: lupeng <penglu2097@gmail.com>
Date: Mon, 20 Feb 2023 14:25:07 +0800
Subject: [PATCH 08/17] add doc for inferencer

---
 docs/en/user_guides/inference.md | 139 +++++++++++++++++++++----------
 1 file changed, 93 insertions(+), 46 deletions(-)

diff --git a/docs/en/user_guides/inference.md b/docs/en/user_guides/inference.md
index b247d819fb..d33aed0248 100644
--- a/docs/en/user_guides/inference.md
+++ b/docs/en/user_guides/inference.md
@@ -11,65 +11,112 @@ To start with, we recommend HRNet model with [this configuration file](/configs/
 
 ## High-level APIs for inference
 
-MMPose provides high-level Python APIs for inference on a given image:
+MMPose offers a comprehensive high-level API for inference, known as `MMPoseInferencer`. This API enables users to perform inference on both images and videos using all the models supported by MMPose. Furthermore, the API provides automatic visualization of inference results and allows for the convenient saving of predictions.
 
-- [init_model](/mmpose/apis/inference.py#L64): Initialize a model with a config and checkpoint
-- [inference_topdown](/mmpose/apis/inference.py#L124): Conduct inference with the top-down pose estimator on a given image
-
-Here is an example of building the model and inference on a given image using the pre-trained checkpoint of HRNet model on COCO dataset.
+Here is an example of inference on a given image using the pre-trained human pose estimator.
 
 ```python
-from mmpose.apis import inference_topdown, init_model
-from mmpose.utils import register_all_modules
+from mmpose.apis import MMPoseInferencer
 
-config_path = 'configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192.py'
-checkpoint_path = 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth' # can be a local path
 img_path = 'tests/data/coco/000000000785.jpg'   # you can specify your own picture path
 
-# register all modules and set mmpose as the default scope.
-register_all_modules()
-# build the model from a config file and a checkpoint file
-model = init_model(config_path, checkpoint_path, device="cpu")  # device can be 'cuda:0'
-# test a single image
-result = inference_topdown(model, img_path)[0]
+# build the inferencer with model alias
+inferencer = MMPoseInferencer('human')
+
+# The MMPoseInferencer API utilizes a lazy inference strategy, whereby it generates a prediction generator when provided with input
+result_generator = inferencer(img_path, show=True)
+result = next(result_generator)
+```
+
+If everything works fine, you will see the following image in a new window.
+![inferencer_result_coco](https://user-images.githubusercontent.com/26127467/220008302-4a57fd44-0978-408e-8351-600e5513316a.jpg)
+
+The variable `result` is a dictionary that contains two keys, `'visualization'` and `'predictions'`. The key `'visualization'` is intended to contain the visualization results. However, as the `return_vis` argument was not specified, this list remains blank. On the other hand, the key `'predictions'` is a list that contains the estimated keypoints for each individual instance.
+
+### CLI tool
+
+A command-line interface (CLI) tool for the inferencer is also available: `demo/inferencer_demo.py`. This tool enables users to perform inference with the same model and inputs using the following command:
+
+```bash
+python demo/inferencer_demo.py 'tests/data/coco/000000000785.jpg' --pose2d 'human' --show --pred-out-dir 'predictions'
+```
+
+The predictions will be save in `predictions/000000000785.json`.
+
+### Custom pose estimation models
 
+The inferencer provides several methods that can be used to customize the models employed:
+
+```python
+
+# build the inferencer with model alias
+# the available aliases include 'human', 'hand', 'face' and 'animal'
+inferencer = MMPoseInferencer('human')
+
+# build the inferencer with model config name
+inferencer = MMPoseInferencer('td-hm_hrnet-w32_8xb64-210e_coco-256x192')
+
+# build the inferencer with model config path and checkpoint path/URL
+inferencer = MMPoseInferencer(
+    pose2d='configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192.py',
+    pose2d_weights='https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth'
+)
 ```
 
-`result` is a `PoseDataSample` containing `gt_instances` and `pred_instances`. And `pred_instances` contains the prediction results, usually containing `keypoints`, `keypoint_scores`. The content of `result.pred_instances` is as follows:
+In addition, top-down pose estimators also require an object detection model. The inferencer is capable of inferring the instance type for models trained with datasets supported in MMPose, and subsequently constructing the necessary object detection model. Alternatively, users may also manually specify the detection model using the following methods:
 
 ```python
-<InstanceData(
-
-    META INFORMATION
-
-    DATA FIELDS
-    keypoints: array([[[365.83333333,  83.33333826],
-                [365.83333333,  75.00000525],
-                [365.83333333,  75.00000525],
-                [382.5       ,  83.33333826],
-                [365.83333333,  83.33333826],
-                [399.16666667, 116.66667032],
-                [365.83333333, 125.00000334],
-                [440.83333333, 158.3333354 ],
-                [340.83333333, 158.3333354 ],
-                [449.16666667, 166.66666842],
-                [299.16666667, 175.00000143],
-                [432.5       , 208.33333349],
-                [415.83333333, 216.66666651],
-                [432.5       , 283.33333063],
-                [374.16666667, 274.99999762],
-                [482.5       , 366.66666079],
-                [407.5       , 341.66666174]]])
-    bbox_scores: array([1.], dtype=float32)
-    bboxes: array([[  0.,   0., 640., 425.]], dtype=float32)
-    keypoint_scores: array([[0.9001359 , 0.90607893, 0.8974595 , 0.8780644 , 0.8363602 ,
-                0.86385334, 0.86548805, 0.53965414, 0.8379145 , 0.77825487,
-                0.9050092 , 0.8631748 , 0.8176921 , 0.9184168 , 0.9040103 ,
-                0.7687361 , 0.9573005 ]], dtype=float32)
-) at 0x7f5785582df0>
+
+# specify detection model by alias
+# the available aliases include 'human', 'hand', 'face', 'animal', as well as any additional aliases defined in mmdet
+inferencer = MMPoseInferencer(
+    # suppose the pose estimator is trained on custom dataset
+    pose2d='custom_human_pose_estimator.py',
+    pose2d_weights='custom_human_pose_estimator.pth',
+    det_model='human'
+)
+
+# specify detection model with model config name
+inferencer = MMPoseInferencer(
+    pose2d='human',
+    det_model='yolox_l_8x8_300e_coco',
+    det_cat_ids=[0],  # the category id of 'human' class
+)
+
+# specify detection model with config path and checkpoint path/URL
+inferencer = MMPoseInferencer(
+    pose2d='human',
+    det_model=f'{PATH_TO_MMDET}/configs/yolox/yolox_l_8x8_300e_coco.py',
+    det_weights='https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth',
+    det_cat_ids=[0],  # the category id of 'human' class
+)
 ```
 
-An image demo can be found in [demo/image_demo.py](/demo/image_demo.py).
+### Input format
+
+The inferencer is capable of processing a range of input types, which includes the following:
+
+- A path to an image
+- A path to a video
+- A path to a folder (which will cause all images in that folder to be inferred)
+- An image array
+- A list of image arrays
+- A webcam (in which case the `input` parameter should be set to either `'webcam'` or `'webcam:{CAMERA_ID}'`)
+
+### Output settings
+
+The inferencer is capable of both visualizing and saving predictions. The relevant arguments are as follows:
+
+| Argument            | Description                                                                                                                                        |
+| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `show`              | Determines whether the image or video should be displayed in a pop-up window.                                                                      |
+| `radius`            | Sets the keypoint radius for visualization.                                                                                                        |
+| `thickness`         | Sets the link thickness for visualization.                                                                                                         |
+| `return_vis`        | Determines whether visualization images should be included in the results.                                                                         |
+| `vis_out_dir`       | Specifies the folder path for saving the visualization images. If not set, the visualization images will not be saved.                             |
+| `return_datasample` | Determines whether to return the prediction in the format of `PoseDataSample`.                                                                     |
+| `pred_out_dir`      | Specifies the folder path for saving the predictions. If not set, the predictions will not be saved.                                               |
+| `out_dir`           | If `vis_out_dir` or `pred_out_dir` is not set, the values will be set to `f'{out_dir}/visualization'` or `f'{out_dir}/predictions'`, respectively. |
 
 ## Demos
 

From 922f76eed2d2389ab1e1902034a77cdb0e95a82c Mon Sep 17 00:00:00 2001
From: lupeng <penglu2097@gmail.com>
Date: Mon, 20 Feb 2023 14:30:20 +0800
Subject: [PATCH 09/17] refine doc

---
 docs/en/user_guides/inference.md | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/docs/en/user_guides/inference.md b/docs/en/user_guides/inference.md
index d33aed0248..09eb2d2213 100644
--- a/docs/en/user_guides/inference.md
+++ b/docs/en/user_guides/inference.md
@@ -23,7 +23,8 @@ img_path = 'tests/data/coco/000000000785.jpg'   # you can specify your own pictu
 # build the inferencer with model alias
 inferencer = MMPoseInferencer('human')
 
-# The MMPoseInferencer API utilizes a lazy inference strategy, whereby it generates a prediction generator when provided with input
+# The MMPoseInferencer API utilizes a lazy inference strategy,
+# whereby it generates a prediction generator when provided with input
 result_generator = inferencer(img_path, show=True)
 result = next(result_generator)
 ```
@@ -38,7 +39,8 @@ The variable `result` is a dictionary that contains two keys, `'visualization'`
 A command-line interface (CLI) tool for the inferencer is also available: `demo/inferencer_demo.py`. This tool enables users to perform inference with the same model and inputs using the following command:
 
 ```bash
-python demo/inferencer_demo.py 'tests/data/coco/000000000785.jpg' --pose2d 'human' --show --pred-out-dir 'predictions'
+python demo/inferencer_demo.py 'tests/data/coco/000000000785.jpg' \
+    --pose2d 'human' --show --pred-out-dir 'predictions'
 ```
 
 The predictions will be save in `predictions/000000000785.json`.
@@ -58,8 +60,10 @@ inferencer = MMPoseInferencer('td-hm_hrnet-w32_8xb64-210e_coco-256x192')
 
 # build the inferencer with model config path and checkpoint path/URL
 inferencer = MMPoseInferencer(
-    pose2d='configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192.py',
-    pose2d_weights='https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth'
+    pose2d='configs/body_2d_keypoint/topdown_heatmap/coco/' \
+           'td-hm_hrnet-w32_8xb64-210e_coco-256x192.py',
+    pose2d_weights='https://download.openmmlab.com/mmpose/top_down/' \
+                   'hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth'
 )
 ```
 
@@ -68,7 +72,8 @@ In addition, top-down pose estimators also require an object detection model. Th
 ```python
 
 # specify detection model by alias
-# the available aliases include 'human', 'hand', 'face', 'animal', as well as any additional aliases defined in mmdet
+# the available aliases include 'human', 'hand', 'face', 'animal',
+# as well as any additional aliases defined in mmdet
 inferencer = MMPoseInferencer(
     # suppose the pose estimator is trained on custom dataset
     pose2d='custom_human_pose_estimator.py',
@@ -87,7 +92,9 @@ inferencer = MMPoseInferencer(
 inferencer = MMPoseInferencer(
     pose2d='human',
     det_model=f'{PATH_TO_MMDET}/configs/yolox/yolox_l_8x8_300e_coco.py',
-    det_weights='https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth',
+    det_weights='https://download.openmmlab.com/mmdetection/v2.0/'
+                'yolox/yolox_l_8x8_300e_coco/' \
+                'yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth',
     det_cat_ids=[0],  # the category id of 'human' class
 )
 ```

From 66b3e96b5e626bd077a80272976e3853f3faed7c Mon Sep 17 00:00:00 2001
From: lupeng <penglu2097@gmail.com>
Date: Mon, 20 Feb 2023 14:31:30 +0800
Subject: [PATCH 10/17] refine doc

---
 docs/en/user_guides/inference.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/user_guides/inference.md b/docs/en/user_guides/inference.md
index 09eb2d2213..cb1a23ccd8 100644
--- a/docs/en/user_guides/inference.md
+++ b/docs/en/user_guides/inference.md
@@ -92,7 +92,7 @@ inferencer = MMPoseInferencer(
 inferencer = MMPoseInferencer(
     pose2d='human',
     det_model=f'{PATH_TO_MMDET}/configs/yolox/yolox_l_8x8_300e_coco.py',
-    det_weights='https://download.openmmlab.com/mmdetection/v2.0/'
+    det_weights='https://download.openmmlab.com/mmdetection/v2.0/' \
                 'yolox/yolox_l_8x8_300e_coco/' \
                 'yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth',
     det_cat_ids=[0],  # the category id of 'human' class

From 29139dadd4bcfa474741743500fb23ac229c5d96 Mon Sep 17 00:00:00 2001
From: lupeng <penglu2097@gmail.com>
Date: Mon, 20 Feb 2023 14:56:54 +0800
Subject: [PATCH 11/17] fix ut

---
 mmpose/apis/inferencers/base_mmpose_inferencer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mmpose/apis/inferencers/base_mmpose_inferencer.py b/mmpose/apis/inferencers/base_mmpose_inferencer.py
index 92df49764c..d99dcc1b68 100644
--- a/mmpose/apis/inferencers/base_mmpose_inferencer.py
+++ b/mmpose/apis/inferencers/base_mmpose_inferencer.py
@@ -118,7 +118,7 @@ def _inputs_to_list(self, inputs: InputsType) -> list:
                         '/')[0]
                     if input_type == 'image':
                         inputs.append(filepath)
-                inputs = inputs
+                inputs.sort()
             else:
                 # if inputs is a path to a video file, it will be converted
                 # to a list containing separated frame filenames

From 8cfc05256b94f895892311f17b80067475289fca Mon Sep 17 00:00:00 2001
From: lupeng <penglu2097@gmail.com>
Date: Mon, 20 Feb 2023 21:59:39 +0800
Subject: [PATCH 12/17] restore document for high-level inference APIs

---
 docs/en/user_guides/inference.md | 66 +++++++++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 2 deletions(-)

diff --git a/docs/en/user_guides/inference.md b/docs/en/user_guides/inference.md
index cb1a23ccd8..e9af2adee1 100644
--- a/docs/en/user_guides/inference.md
+++ b/docs/en/user_guides/inference.md
@@ -9,9 +9,9 @@ In MMPose, a model is defined by a configuration file and existing model paramet
 
 To start with, we recommend HRNet model with [this configuration file](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192.py) and [this checkpoint file](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth). It is recommended to download the checkpoint file to `checkpoints` directory.
 
-## High-level APIs for inference
+## Out-of-the-box inferencer
 
-MMPose offers a comprehensive high-level API for inference, known as `MMPoseInferencer`. This API enables users to perform inference on both images and videos using all the models supported by MMPose. Furthermore, the API provides automatic visualization of inference results and allows for the convenient saving of predictions.
+MMPose offers a comprehensive API for inference, known as `MMPoseInferencer`. This API enables users to perform inference on both images and videos using all the models supported by MMPose. Furthermore, the API provides automatic visualization of inference results and allows for the convenient saving of predictions.
 
 Here is an example of inference on a given image using the pre-trained human pose estimator.
 
@@ -125,6 +125,68 @@ The inferencer is capable of both visualizing and saving predictions. The releva
 | `pred_out_dir`      | Specifies the folder path for saving the predictions. If not set, the predictions will not be saved.                                               |
 | `out_dir`           | If `vis_out_dir` or `pred_out_dir` is not set, the values will be set to `f'{out_dir}/visualization'` or `f'{out_dir}/predictions'`, respectively. |
 
+## High-level APIs for inference
+
+MMPose provides high-level Python APIs for inference on a given image:
+
+- [init_model](/mmpose/apis/inference.py#L64): Initialize a model with a config and checkpoint
+- [inference_topdown](/mmpose/apis/inference.py#L124): Conduct inference with the top-down pose estimator on a given image
+
+Here is an example of building the model and inference on a given image using the pre-trained checkpoint of HRNet model on COCO dataset.
+
+```python
+from mmpose.apis import inference_topdown, init_model
+from mmpose.utils import register_all_modules
+
+config_path = 'configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192.py'
+checkpoint_path = 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth' # can be a local path
+img_path = 'tests/data/coco/000000000785.jpg'   # you can specify your own picture path
+
+# register all modules and set mmpose as the default scope.
+register_all_modules()
+# build the model from a config file and a checkpoint file
+model = init_model(config_path, checkpoint_path, device="cpu")  # device can be 'cuda:0'
+# test a single image
+result = inference_topdown(model, img_path)[0]
+
+```
+
+`result` is a `PoseDataSample` containing `gt_instances` and `pred_instances`. And `pred_instances` contains the prediction results, usually containing `keypoints`, `keypoint_scores`. The content of `result.pred_instances` is as follows:
+
+```python
+<InstanceData(
+
+    META INFORMATION
+
+    DATA FIELDS
+    keypoints: array([[[365.83333333,  83.33333826],
+                [365.83333333,  75.00000525],
+                [365.83333333,  75.00000525],
+                [382.5       ,  83.33333826],
+                [365.83333333,  83.33333826],
+                [399.16666667, 116.66667032],
+                [365.83333333, 125.00000334],
+                [440.83333333, 158.3333354 ],
+                [340.83333333, 158.3333354 ],
+                [449.16666667, 166.66666842],
+                [299.16666667, 175.00000143],
+                [432.5       , 208.33333349],
+                [415.83333333, 216.66666651],
+                [432.5       , 283.33333063],
+                [374.16666667, 274.99999762],
+                [482.5       , 366.66666079],
+                [407.5       , 341.66666174]]])
+    bbox_scores: array([1.], dtype=float32)
+    bboxes: array([[  0.,   0., 640., 425.]], dtype=float32)
+    keypoint_scores: array([[0.9001359 , 0.90607893, 0.8974595 , 0.8780644 , 0.8363602 ,
+                0.86385334, 0.86548805, 0.53965414, 0.8379145 , 0.77825487,
+                0.9050092 , 0.8631748 , 0.8176921 , 0.9184168 , 0.9040103 ,
+                0.7687361 , 0.9573005 ]], dtype=float32)
+) at 0x7f5785582df0>
+```
+
+An image demo can be found in [demo/image_demo.py](/demo/image_demo.py).
+
 ## Demos
 
 We also provide demo scripts, implemented with high-level APIs and supporting various tasks. Source codes are available [here](/demo). You can refer to the [docs](/demo/docs) for detail descriptions

From 015089b785521f4eedabd3b6be3de02b21b58c7b Mon Sep 17 00:00:00 2001
From: lupeng <penglu2097@gmail.com>
Date: Wed, 22 Feb 2023 11:18:13 +0800
Subject: [PATCH 13/17] fix typo

---
 mmpose/registry.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mmpose/registry.py b/mmpose/registry.py
index f0e46a315f..f1c080565f 100644
--- a/mmpose/registry.py
+++ b/mmpose/registry.py
@@ -131,4 +131,4 @@
 INFERENCERS = Registry(
     'inferencer',
     parent=MMENGINE_INFERENCERS,
-    locations=['mmaction.apis.inferencers'])
+    locations=['mmpose.apis.inferencers'])

From 702c9f3d909a29c1fdcf7170bf05066b402a60bd Mon Sep 17 00:00:00 2001
From: lupeng <penglu2097@gmail.com>
Date: Wed, 22 Feb 2023 13:25:18 +0800
Subject: [PATCH 14/17] update face demo to use mmdet

---
 demo/docs/2d_face_demo.md   |  29 +++--
 demo/docs/mmdet_modelzoo.md |  10 ++
 demo/topdown_face_demo.py   | 218 ------------------------------------
 3 files changed, 27 insertions(+), 230 deletions(-)
 delete mode 100644 demo/topdown_face_demo.py

diff --git a/demo/docs/2d_face_demo.md b/demo/docs/2d_face_demo.md
index 13ff380b69..e1fce81eb8 100644
--- a/demo/docs/2d_face_demo.md
+++ b/demo/docs/2d_face_demo.md
@@ -1,29 +1,28 @@
 ## 2D Face Keypoint Demo
 
-We provide a demo script to test a single image or video with face detectors and top-down pose estimators, Please install `face_recognition` before running the demo, by:
+We provide a demo script to test a single image or video with hand detectors and top-down pose estimators. Assume that you have already installed [mmdet](https://github.com/open-mmlab/mmdetection) with version >= 3.0.
 
-```
-pip install face_recognition
-```
-
-For more details, please refer to [face_recognition](https://github.com/ageitgey/face_recognition).
+**Face Box Model Preparation:** The pre-trained face box estimation model can be found in [mmdet model zoo](/demo/docs/mmdet_modelzoo.md).
 
 ### 2D Face Image Demo
 
 ```shell
-python demo/topdown_face_demo.py \
+python demo/topdown_demo_with_mmdet.py \
+    ${MMDET_CONFIG_FILE} ${MMDET_CHECKPOINT_FILE} \
     ${MMPOSE_CONFIG_FILE} ${MMPOSE_CHECKPOINT_FILE} \
     --input ${INPUT_PATH} [--output-root ${OUTPUT_DIR}] \
     [--show] [--device ${GPU_ID or CPU}] [--save-predictions] \
     [--draw-heatmap ${DRAW_HEATMAP}] [--radius ${KPT_RADIUS}] \
-    [--kpt-thr ${KPT_SCORE_THR}]
+    [--kpt-thr ${KPT_SCORE_THR}] [--bbox-thr ${BBOX_SCORE_THR}]
 ```
 
 The pre-trained face keypoint estimation models can be found from [model zoo](https://mmpose.readthedocs.io/en/1.x/model_zoo/face_2d_keypoint.html).
 Take [aflw model](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256-f2bbc62b_20210125.pth) as an example:
 
 ```shell
-python demo/topdown_face_demo.py \
+python demo/topdown_demo_with_mmdet.py \
+    demo/mmdetection_cfg/yolox-s_8xb8-300e_coco-face.py \
+    https://download.openmmlab.com/mmpose/mmdet_pretrained/yolo-x_8xb8-300e_coco-face_13274d7c.pth \
     configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_8xb64-60e_aflw-256x256.py \
     https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256-f2bbc62b_20210125.pth \
     --input tests/data/cofw/001766.jpg \
@@ -39,7 +38,9 @@ If you use a heatmap-based model and set argument `--draw-heatmap`, the predicte
 To save visualized results on disk:
 
 ```shell
-python demo/topdown_face_demo.py \
+python demo/topdown_demo_with_mmdet.py \
+    demo/mmdetection_cfg/yolox-s_8xb8-300e_coco-face.py \
+    https://download.openmmlab.com/mmpose/mmdet_pretrained/yolo-x_8xb8-300e_coco-face_13274d7c.pth \
     configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_8xb64-60e_aflw-256x256.py \
     https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256-f2bbc62b_20210125.pth \
     --input tests/data/cofw/001766.jpg \
@@ -51,7 +52,9 @@ To save the predicted results on disk, please specify `--save-predictions`.
 To run demos on CPU:
 
 ```shell
-python demo/topdown_face_demo.py \
+python demo/topdown_demo_with_mmdet.py \
+    demo/mmdetection_cfg/yolox-s_8xb8-300e_coco-face.py \
+    https://download.openmmlab.com/mmpose/mmdet_pretrained/yolo-x_8xb8-300e_coco-face_13274d7c.pth \
     configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_8xb64-60e_aflw-256x256.py \
     https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256-f2bbc62b_20210125.pth \
     --input tests/data/cofw/001766.jpg \
@@ -63,7 +66,9 @@ python demo/topdown_face_demo.py \
 Videos share the same interface with images. The difference is that the `${INPUT_PATH}` for videos can be the local path or **URL** link to video file.
 
 ```shell
-python demo/topdown_face_demo.py \
+python demo/topdown_demo_with_mmdet.py \
+    demo/mmdetection_cfg/yolox-s_8xb8-300e_coco-face.py \
+    https://download.openmmlab.com/mmpose/mmdet_pretrained/yolo-x_8xb8-300e_coco-face_13274d7c.pth \
     configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_8xb64-60e_aflw-256x256.py \
     https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256-f2bbc62b_20210125.pth \
     --input demo/resources/<demo_face.mp4> \
diff --git a/demo/docs/mmdet_modelzoo.md b/demo/docs/mmdet_modelzoo.md
index d438a5e982..a50be168a5 100644
--- a/demo/docs/mmdet_modelzoo.md
+++ b/demo/docs/mmdet_modelzoo.md
@@ -15,6 +15,16 @@ For hand bounding box detection, we simply train our hand box models on onehand1
 | :---------------------------------------------------------------- | :----: | :---------------------------------------------------------------: | :--------------------------------------------------------------: |
 | [Cascade_R-CNN X-101-64x4d-FPN-1class](/demo/mmdetection_cfg/cascade_rcnn_x101_64x4d_fpn_1class.py) | 0.817  | [ckpt](https://download.openmmlab.com/mmpose/mmdet_pretrained/cascade_rcnn_x101_64x4d_fpn_20e_onehand10k-dac19597_20201030.pth) | [log](https://download.openmmlab.com/mmpose/mmdet_pretrained/cascade_rcnn_x101_64x4d_fpn_20e_onehand10k_20201030.log.json) |
 
+### Face Bounding Box Detection Models
+
+For face bounding box detection, we train a YOLOX detector on COCO-face data using MMDetection.
+
+#### Hand detection results on OneHand10K test set
+
+| Arch                                                            | Box AP |                                                  ckpt                                                  |
+| :-------------------------------------------------------------- | :----: | :----------------------------------------------------------------------------------------------------: |
+| [YOLOX-s](/demo/mmdetection_cfg/yolox-s_8xb8-300e_coco-face.py) | 0.408  | [ckpt](https://download.openmmlab.com/mmpose/mmdet_pretrained/yolo-x_8xb8-300e_coco-face_13274d7c.pth) |
+
 ### Animal Bounding Box Detection Models
 
 #### COCO animals
diff --git a/demo/topdown_face_demo.py b/demo/topdown_face_demo.py
deleted file mode 100644
index 3442d098e3..0000000000
--- a/demo/topdown_face_demo.py
+++ /dev/null
@@ -1,218 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import mimetypes
-import os
-import tempfile
-from argparse import ArgumentParser
-
-import json_tricks as json
-import mmcv
-import mmengine
-import numpy as np
-
-from mmpose.apis import inference_topdown
-from mmpose.apis import init_model as init_pose_estimator
-from mmpose.evaluation.functional import nms
-from mmpose.registry import VISUALIZERS
-from mmpose.structures import merge_data_samples, split_instances
-
-try:
-    import face_recognition
-    has_face_det = True
-except (ImportError, ModuleNotFoundError):
-    has_face_det = False
-
-
-def process_face_det_results(face_det_results):
-    """Process det results, and return a list of bboxes.
-
-    :param face_det_results: (top, right, bottom and left)
-    :return: a list of detected bounding boxes (x,y,x,y)-format
-    """
-
-    person_results = []
-    for bbox in face_det_results:
-        # left, top, right, bottom
-        person_results.append([bbox[3], bbox[0], bbox[1], bbox[2]])
-    person_results = np.array(person_results)
-
-    return person_results
-
-
-def process_one_image(args, img_path, pose_estimator, visualizer,
-                      show_interval):
-    """Visualize predicted keypoints (and heatmaps) of one image."""
-
-    # predict bbox
-    image = face_recognition.load_image_file(img_path)
-    face_det_results = face_recognition.face_locations(image)
-    bboxes = process_face_det_results(face_det_results)
-
-    bboxes = np.concatenate((bboxes, np.ones((bboxes.shape[0], 1))), axis=1)
-    bboxes = bboxes[nms(bboxes, args.nms_thr), :4]
-
-    # predict keypoints
-    pose_results = inference_topdown(pose_estimator, img_path, bboxes)
-    data_samples = merge_data_samples(pose_results)
-
-    # show the results
-    img = mmcv.imread(img_path, channel_order='rgb')
-
-    out_file = None
-    if args.output_root:
-        out_file = f'{args.output_root}/{os.path.basename(img_path)}'
-
-    visualizer.add_datasample(
-        'result',
-        img,
-        data_sample=data_samples,
-        draw_gt=False,
-        draw_heatmap=args.draw_heatmap,
-        draw_bbox=args.draw_bbox,
-        show=args.show,
-        wait_time=show_interval,
-        out_file=out_file,
-        kpt_score_thr=args.kpt_thr)
-
-    return data_samples.pred_instances
-
-
-def main():
-    """Visualize the demo images.
-
-    Use `face_recognition` to detect the face.
-    """
-    parser = ArgumentParser()
-    parser.add_argument('pose_config', help='Config file for pose')
-    parser.add_argument('pose_checkpoint', help='Checkpoint file for pose')
-    parser.add_argument(
-        '--input', type=str, default='', help='Image/Video file')
-    parser.add_argument(
-        '--show',
-        action='store_true',
-        default=False,
-        help='whether to show img')
-    parser.add_argument(
-        '--output-root',
-        type=str,
-        default='',
-        help='root of the output img file. '
-        'Default not saving the visualization images.')
-    parser.add_argument(
-        '--save-predictions',
-        action='store_true',
-        default=False,
-        help='whether to save predicted results')
-    parser.add_argument(
-        '--device', default='cuda:0', help='Device used for inference')
-    parser.add_argument(
-        '--nms-thr',
-        type=float,
-        default=0.3,
-        help='IoU threshold for bounding box NMS')
-    parser.add_argument(
-        '--kpt-thr', type=float, default=0.3, help='Keypoint score threshold')
-    parser.add_argument(
-        '--draw-heatmap',
-        action='store_true',
-        default=False,
-        help='Draw heatmap predicted by the model')
-    parser.add_argument(
-        '--radius',
-        type=int,
-        default=2,
-        help='Keypoint radius for visualization')
-    parser.add_argument(
-        '--thickness',
-        type=int,
-        default=1,
-        help='Link thickness for visualization')
-    parser.add_argument(
-        '--draw-bbox', action='store_true', help='Draw bboxes of instances')
-
-    assert has_face_det, 'Please install face_recognition to run the demo. ' \
-                         '"pip install face_recognition", For more details, ' \
-                         'see https://github.com/ageitgey/face_recognition'
-
-    args = parser.parse_args()
-
-    assert args.show or (args.output_root != '')
-    assert args.input != ''
-    if args.output_root:
-        mmengine.mkdir_or_exist(args.output_root)
-    if args.save_predictions:
-        assert args.output_root != ''
-        args.pred_save_path = f'{args.output_root}/results_' \
-            f'{os.path.splitext(os.path.basename(args.input))[0]}.json'
-
-    # build pose estimator
-    pose_estimator = init_pose_estimator(
-        args.pose_config,
-        args.pose_checkpoint,
-        device=args.device,
-        cfg_options=dict(
-            model=dict(test_cfg=dict(output_heatmaps=args.draw_heatmap))))
-
-    # init visualizer
-    pose_estimator.cfg.visualizer.radius = args.radius
-    pose_estimator.cfg.visualizer.line_width = args.thickness
-    visualizer = VISUALIZERS.build(pose_estimator.cfg.visualizer)
-    # the dataset_meta is loaded from the checkpoint and
-    # then pass to the model in init_pose_estimator
-    visualizer.set_dataset_meta(pose_estimator.dataset_meta)
-    visualizer.kpt_color = 'red'
-
-    input_type = mimetypes.guess_type(args.input)[0].split('/')[0]
-    if input_type == 'image':
-        pred_instances = process_one_image(
-            args, args.input, pose_estimator, visualizer, show_interval=0)
-        pred_instances_list = split_instances(pred_instances)
-
-    elif input_type == 'video':
-        tmp_folder = tempfile.TemporaryDirectory()
-        video = mmcv.VideoReader(args.input)
-        progressbar = mmengine.ProgressBar(len(video))
-        video.cvt2frames(tmp_folder.name, show_progress=False)
-        output_root = args.output_root
-        args.output_root = tmp_folder.name
-        pred_instances_list = []
-
-        for frame_id, img_fname in enumerate(os.listdir(tmp_folder.name)):
-            pred_instances = process_one_image(
-                args,
-                f'{tmp_folder.name}/{img_fname}',
-                pose_estimator,
-                visualizer,
-                show_interval=1)
-            progressbar.update()
-            pred_instances_list.append(
-                dict(
-                    frame_id=frame_id,
-                    instances=split_instances(pred_instances)))
-
-        if output_root:
-            mmcv.frames2video(
-                tmp_folder.name,
-                f'{output_root}/{os.path.basename(args.input)}',
-                fps=video.fps,
-                fourcc='mp4v',
-                show_progress=False)
-        tmp_folder.cleanup()
-
-    else:
-        args.save_predictions = False
-        raise ValueError(
-            f'file {os.path.basename(args.input)} has invalid format.')
-
-    if args.save_predictions:
-        with open(args.pred_save_path, 'w') as f:
-            json.dump(
-                dict(
-                    meta_info=pose_estimator.dataset_meta,
-                    instance_info=pred_instances_list),
-                f,
-                indent='\t')
-        print(f'predictions have been saved at {args.pred_save_path}')
-
-
-if __name__ == '__main__':
-    main()

From a413631a35eb538eb2d18ae632f5013c7e920aab Mon Sep 17 00:00:00 2001
From: lupeng <penglu2097@gmail.com>
Date: Wed, 22 Feb 2023 14:10:26 +0800
Subject: [PATCH 15/17] change face landmark color from white to red

---
 configs/_base_/datasets/300w.py               | 432 +++--------
 configs/_base_/datasets/aflw.py               |  77 +-
 .../_base_/datasets/coco_wholebody_face.py    | 514 +++----------
 configs/_base_/datasets/cofw.py               | 135 +---
 configs/_base_/datasets/wflw.py               | 690 ++++--------------
 5 files changed, 399 insertions(+), 1449 deletions(-)

diff --git a/configs/_base_/datasets/300w.py b/configs/_base_/datasets/300w.py
index 10c343a2ad..2c3728da1d 100644
--- a/configs/_base_/datasets/300w.py
+++ b/configs/_base_/datasets/300w.py
@@ -11,373 +11,123 @@
         homepage='https://ibug.doc.ic.ac.uk/resources/300-W/',
     ),
     keypoint_info={
-        0:
-        dict(
-            name='kpt-0', id=0, color=[255, 255, 255], type='', swap='kpt-16'),
-        1:
-        dict(
-            name='kpt-1', id=1, color=[255, 255, 255], type='', swap='kpt-15'),
-        2:
-        dict(
-            name='kpt-2', id=2, color=[255, 255, 255], type='', swap='kpt-14'),
-        3:
-        dict(
-            name='kpt-3', id=3, color=[255, 255, 255], type='', swap='kpt-13'),
-        4:
-        dict(
-            name='kpt-4', id=4, color=[255, 255, 255], type='', swap='kpt-12'),
-        5:
-        dict(
-            name='kpt-5', id=5, color=[255, 255, 255], type='', swap='kpt-11'),
-        6:
-        dict(
-            name='kpt-6', id=6, color=[255, 255, 255], type='', swap='kpt-10'),
-        7:
-        dict(name='kpt-7', id=7, color=[255, 255, 255], type='', swap='kpt-9'),
-        8:
-        dict(name='kpt-8', id=8, color=[255, 255, 255], type='', swap=''),
-        9:
-        dict(name='kpt-9', id=9, color=[255, 255, 255], type='', swap='kpt-7'),
+        0: dict(name='kpt-0', id=0, color=[255, 0, 0], type='', swap='kpt-16'),
+        1: dict(name='kpt-1', id=1, color=[255, 0, 0], type='', swap='kpt-15'),
+        2: dict(name='kpt-2', id=2, color=[255, 0, 0], type='', swap='kpt-14'),
+        3: dict(name='kpt-3', id=3, color=[255, 0, 0], type='', swap='kpt-13'),
+        4: dict(name='kpt-4', id=4, color=[255, 0, 0], type='', swap='kpt-12'),
+        5: dict(name='kpt-5', id=5, color=[255, 0, 0], type='', swap='kpt-11'),
+        6: dict(name='kpt-6', id=6, color=[255, 0, 0], type='', swap='kpt-10'),
+        7: dict(name='kpt-7', id=7, color=[255, 0, 0], type='', swap='kpt-9'),
+        8: dict(name='kpt-8', id=8, color=[255, 0, 0], type='', swap=''),
+        9: dict(name='kpt-9', id=9, color=[255, 0, 0], type='', swap='kpt-7'),
         10:
-        dict(
-            name='kpt-10', id=10, color=[255, 255, 255], type='',
-            swap='kpt-6'),
+        dict(name='kpt-10', id=10, color=[255, 0, 0], type='', swap='kpt-6'),
         11:
-        dict(
-            name='kpt-11', id=11, color=[255, 255, 255], type='',
-            swap='kpt-5'),
+        dict(name='kpt-11', id=11, color=[255, 0, 0], type='', swap='kpt-5'),
         12:
-        dict(
-            name='kpt-12', id=12, color=[255, 255, 255], type='',
-            swap='kpt-4'),
+        dict(name='kpt-12', id=12, color=[255, 0, 0], type='', swap='kpt-4'),
         13:
-        dict(
-            name='kpt-13', id=13, color=[255, 255, 255], type='',
-            swap='kpt-3'),
+        dict(name='kpt-13', id=13, color=[255, 0, 0], type='', swap='kpt-3'),
         14:
-        dict(
-            name='kpt-14', id=14, color=[255, 255, 255], type='',
-            swap='kpt-2'),
+        dict(name='kpt-14', id=14, color=[255, 0, 0], type='', swap='kpt-2'),
         15:
-        dict(
-            name='kpt-15', id=15, color=[255, 255, 255], type='',
-            swap='kpt-1'),
+        dict(name='kpt-15', id=15, color=[255, 0, 0], type='', swap='kpt-1'),
         16:
-        dict(
-            name='kpt-16', id=16, color=[255, 255, 255], type='',
-            swap='kpt-0'),
+        dict(name='kpt-16', id=16, color=[255, 0, 0], type='', swap='kpt-0'),
         17:
-        dict(
-            name='kpt-17',
-            id=17,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-26'),
+        dict(name='kpt-17', id=17, color=[255, 0, 0], type='', swap='kpt-26'),
         18:
-        dict(
-            name='kpt-18',
-            id=18,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-25'),
+        dict(name='kpt-18', id=18, color=[255, 0, 0], type='', swap='kpt-25'),
         19:
-        dict(
-            name='kpt-19',
-            id=19,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-24'),
+        dict(name='kpt-19', id=19, color=[255, 0, 0], type='', swap='kpt-24'),
         20:
-        dict(
-            name='kpt-20',
-            id=20,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-23'),
+        dict(name='kpt-20', id=20, color=[255, 0, 0], type='', swap='kpt-23'),
         21:
-        dict(
-            name='kpt-21',
-            id=21,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-22'),
+        dict(name='kpt-21', id=21, color=[255, 0, 0], type='', swap='kpt-22'),
         22:
-        dict(
-            name='kpt-22',
-            id=22,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-21'),
+        dict(name='kpt-22', id=22, color=[255, 0, 0], type='', swap='kpt-21'),
         23:
-        dict(
-            name='kpt-23',
-            id=23,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-20'),
+        dict(name='kpt-23', id=23, color=[255, 0, 0], type='', swap='kpt-20'),
         24:
-        dict(
-            name='kpt-24',
-            id=24,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-19'),
+        dict(name='kpt-24', id=24, color=[255, 0, 0], type='', swap='kpt-19'),
         25:
-        dict(
-            name='kpt-25',
-            id=25,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-18'),
+        dict(name='kpt-25', id=25, color=[255, 0, 0], type='', swap='kpt-18'),
         26:
-        dict(
-            name='kpt-26',
-            id=26,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-17'),
-        27:
-        dict(name='kpt-27', id=27, color=[255, 255, 255], type='', swap=''),
-        28:
-        dict(name='kpt-28', id=28, color=[255, 255, 255], type='', swap=''),
-        29:
-        dict(name='kpt-29', id=29, color=[255, 255, 255], type='', swap=''),
-        30:
-        dict(name='kpt-30', id=30, color=[255, 255, 255], type='', swap=''),
+        dict(name='kpt-26', id=26, color=[255, 0, 0], type='', swap='kpt-17'),
+        27: dict(name='kpt-27', id=27, color=[255, 0, 0], type='', swap=''),
+        28: dict(name='kpt-28', id=28, color=[255, 0, 0], type='', swap=''),
+        29: dict(name='kpt-29', id=29, color=[255, 0, 0], type='', swap=''),
+        30: dict(name='kpt-30', id=30, color=[255, 0, 0], type='', swap=''),
         31:
-        dict(
-            name='kpt-31',
-            id=31,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-35'),
+        dict(name='kpt-31', id=31, color=[255, 0, 0], type='', swap='kpt-35'),
         32:
-        dict(
-            name='kpt-32',
-            id=32,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-34'),
-        33:
-        dict(name='kpt-33', id=33, color=[255, 255, 255], type='', swap=''),
+        dict(name='kpt-32', id=32, color=[255, 0, 0], type='', swap='kpt-34'),
+        33: dict(name='kpt-33', id=33, color=[255, 0, 0], type='', swap=''),
         34:
-        dict(
-            name='kpt-34',
-            id=34,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-32'),
+        dict(name='kpt-34', id=34, color=[255, 0, 0], type='', swap='kpt-32'),
         35:
-        dict(
-            name='kpt-35',
-            id=35,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-31'),
+        dict(name='kpt-35', id=35, color=[255, 0, 0], type='', swap='kpt-31'),
         36:
-        dict(
-            name='kpt-36',
-            id=36,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-45'),
+        dict(name='kpt-36', id=36, color=[255, 0, 0], type='', swap='kpt-45'),
         37:
-        dict(
-            name='kpt-37',
-            id=37,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-44'),
+        dict(name='kpt-37', id=37, color=[255, 0, 0], type='', swap='kpt-44'),
         38:
-        dict(
-            name='kpt-38',
-            id=38,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-43'),
+        dict(name='kpt-38', id=38, color=[255, 0, 0], type='', swap='kpt-43'),
         39:
-        dict(
-            name='kpt-39',
-            id=39,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-42'),
+        dict(name='kpt-39', id=39, color=[255, 0, 0], type='', swap='kpt-42'),
         40:
-        dict(
-            name='kpt-40',
-            id=40,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-47'),
-        41:
-        dict(
-            name='kpt-41',
-            id=41,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-46'),
-        42:
-        dict(
-            name='kpt-42',
-            id=42,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-39'),
-        43:
-        dict(
-            name='kpt-43',
-            id=43,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-38'),
-        44:
-        dict(
-            name='kpt-44',
-            id=44,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-37'),
-        45:
-        dict(
-            name='kpt-45',
-            id=45,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-36'),
-        46:
-        dict(
-            name='kpt-46',
-            id=46,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-41'),
-        47:
-        dict(
-            name='kpt-47',
-            id=47,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-40'),
-        48:
-        dict(
-            name='kpt-48',
-            id=48,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-54'),
-        49:
-        dict(
-            name='kpt-49',
-            id=49,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-53'),
-        50:
-        dict(
-            name='kpt-50',
-            id=50,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-52'),
-        51:
-        dict(name='kpt-51', id=51, color=[255, 255, 255], type='', swap=''),
-        52:
-        dict(
-            name='kpt-52',
-            id=52,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-50'),
-        53:
-        dict(
-            name='kpt-53',
-            id=53,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-49'),
-        54:
-        dict(
-            name='kpt-54',
-            id=54,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-48'),
-        55:
-        dict(
-            name='kpt-55',
-            id=55,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-59'),
-        56:
-        dict(
-            name='kpt-56',
-            id=56,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-58'),
-        57:
-        dict(name='kpt-57', id=57, color=[255, 255, 255], type='', swap=''),
-        58:
-        dict(
-            name='kpt-58',
-            id=58,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-56'),
-        59:
-        dict(
-            name='kpt-59',
-            id=59,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-55'),
-        60:
-        dict(
-            name='kpt-60',
-            id=60,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-64'),
-        61:
-        dict(
-            name='kpt-61',
-            id=61,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-63'),
-        62:
-        dict(name='kpt-62', id=62, color=[255, 255, 255], type='', swap=''),
-        63:
-        dict(
-            name='kpt-63',
-            id=63,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-61'),
-        64:
-        dict(
-            name='kpt-64',
-            id=64,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-60'),
-        65:
-        dict(
-            name='kpt-65',
-            id=65,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-67'),
-        66:
-        dict(name='kpt-66', id=66, color=[255, 255, 255], type='', swap=''),
-        67:
-        dict(
-            name='kpt-67',
-            id=67,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-65'),
+        dict(name='kpt-40', id=40, color=[255, 0, 0], type='', swap='kpt-47'),
+        41: dict(
+            name='kpt-41', id=41, color=[255, 0, 0], type='', swap='kpt-46'),
+        42: dict(
+            name='kpt-42', id=42, color=[255, 0, 0], type='', swap='kpt-39'),
+        43: dict(
+            name='kpt-43', id=43, color=[255, 0, 0], type='', swap='kpt-38'),
+        44: dict(
+            name='kpt-44', id=44, color=[255, 0, 0], type='', swap='kpt-37'),
+        45: dict(
+            name='kpt-45', id=45, color=[255, 0, 0], type='', swap='kpt-36'),
+        46: dict(
+            name='kpt-46', id=46, color=[255, 0, 0], type='', swap='kpt-41'),
+        47: dict(
+            name='kpt-47', id=47, color=[255, 0, 0], type='', swap='kpt-40'),
+        48: dict(
+            name='kpt-48', id=48, color=[255, 0, 0], type='', swap='kpt-54'),
+        49: dict(
+            name='kpt-49', id=49, color=[255, 0, 0], type='', swap='kpt-53'),
+        50: dict(
+            name='kpt-50', id=50, color=[255, 0, 0], type='', swap='kpt-52'),
+        51: dict(name='kpt-51', id=51, color=[255, 0, 0], type='', swap=''),
+        52: dict(
+            name='kpt-52', id=52, color=[255, 0, 0], type='', swap='kpt-50'),
+        53: dict(
+            name='kpt-53', id=53, color=[255, 0, 0], type='', swap='kpt-49'),
+        54: dict(
+            name='kpt-54', id=54, color=[255, 0, 0], type='', swap='kpt-48'),
+        55: dict(
+            name='kpt-55', id=55, color=[255, 0, 0], type='', swap='kpt-59'),
+        56: dict(
+            name='kpt-56', id=56, color=[255, 0, 0], type='', swap='kpt-58'),
+        57: dict(name='kpt-57', id=57, color=[255, 0, 0], type='', swap=''),
+        58: dict(
+            name='kpt-58', id=58, color=[255, 0, 0], type='', swap='kpt-56'),
+        59: dict(
+            name='kpt-59', id=59, color=[255, 0, 0], type='', swap='kpt-55'),
+        60: dict(
+            name='kpt-60', id=60, color=[255, 0, 0], type='', swap='kpt-64'),
+        61: dict(
+            name='kpt-61', id=61, color=[255, 0, 0], type='', swap='kpt-63'),
+        62: dict(name='kpt-62', id=62, color=[255, 0, 0], type='', swap=''),
+        63: dict(
+            name='kpt-63', id=63, color=[255, 0, 0], type='', swap='kpt-61'),
+        64: dict(
+            name='kpt-64', id=64, color=[255, 0, 0], type='', swap='kpt-60'),
+        65: dict(
+            name='kpt-65', id=65, color=[255, 0, 0], type='', swap='kpt-67'),
+        66: dict(name='kpt-66', id=66, color=[255, 0, 0], type='', swap=''),
+        67: dict(
+            name='kpt-67', id=67, color=[255, 0, 0], type='', swap='kpt-65'),
     },
     skeleton_info={},
     joint_weights=[1.] * 68,
diff --git a/configs/_base_/datasets/aflw.py b/configs/_base_/datasets/aflw.py
index bf534cbb75..cf5e10964d 100644
--- a/configs/_base_/datasets/aflw.py
+++ b/configs/_base_/datasets/aflw.py
@@ -13,70 +13,31 @@
         'team-bischof/lrs/downloads/aflw/',
     ),
     keypoint_info={
-        0:
-        dict(name='kpt-0', id=0, color=[255, 255, 255], type='', swap='kpt-5'),
-        1:
-        dict(name='kpt-1', id=1, color=[255, 255, 255], type='', swap='kpt-4'),
-        2:
-        dict(name='kpt-2', id=2, color=[255, 255, 255], type='', swap='kpt-3'),
-        3:
-        dict(name='kpt-3', id=3, color=[255, 255, 255], type='', swap='kpt-2'),
-        4:
-        dict(name='kpt-4', id=4, color=[255, 255, 255], type='', swap='kpt-1'),
-        5:
-        dict(name='kpt-5', id=5, color=[255, 255, 255], type='', swap='kpt-0'),
-        6:
-        dict(
-            name='kpt-6', id=6, color=[255, 255, 255], type='', swap='kpt-11'),
-        7:
-        dict(
-            name='kpt-7', id=7, color=[255, 255, 255], type='', swap='kpt-10'),
-        8:
-        dict(name='kpt-8', id=8, color=[255, 255, 255], type='', swap='kpt-9'),
-        9:
-        dict(name='kpt-9', id=9, color=[255, 255, 255], type='', swap='kpt-8'),
+        0: dict(name='kpt-0', id=0, color=[255, 0, 0], type='', swap='kpt-5'),
+        1: dict(name='kpt-1', id=1, color=[255, 0, 0], type='', swap='kpt-4'),
+        2: dict(name='kpt-2', id=2, color=[255, 0, 0], type='', swap='kpt-3'),
+        3: dict(name='kpt-3', id=3, color=[255, 0, 0], type='', swap='kpt-2'),
+        4: dict(name='kpt-4', id=4, color=[255, 0, 0], type='', swap='kpt-1'),
+        5: dict(name='kpt-5', id=5, color=[255, 0, 0], type='', swap='kpt-0'),
+        6: dict(name='kpt-6', id=6, color=[255, 0, 0], type='', swap='kpt-11'),
+        7: dict(name='kpt-7', id=7, color=[255, 0, 0], type='', swap='kpt-10'),
+        8: dict(name='kpt-8', id=8, color=[255, 0, 0], type='', swap='kpt-9'),
+        9: dict(name='kpt-9', id=9, color=[255, 0, 0], type='', swap='kpt-8'),
         10:
-        dict(
-            name='kpt-10', id=10, color=[255, 255, 255], type='',
-            swap='kpt-7'),
+        dict(name='kpt-10', id=10, color=[255, 0, 0], type='', swap='kpt-7'),
         11:
-        dict(
-            name='kpt-11', id=11, color=[255, 255, 255], type='',
-            swap='kpt-6'),
+        dict(name='kpt-11', id=11, color=[255, 0, 0], type='', swap='kpt-6'),
         12:
-        dict(
-            name='kpt-12',
-            id=12,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-14'),
-        13:
-        dict(name='kpt-13', id=13, color=[255, 255, 255], type='', swap=''),
+        dict(name='kpt-12', id=12, color=[255, 0, 0], type='', swap='kpt-14'),
+        13: dict(name='kpt-13', id=13, color=[255, 0, 0], type='', swap=''),
         14:
-        dict(
-            name='kpt-14',
-            id=14,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-12'),
+        dict(name='kpt-14', id=14, color=[255, 0, 0], type='', swap='kpt-12'),
         15:
-        dict(
-            name='kpt-15',
-            id=15,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-17'),
-        16:
-        dict(name='kpt-16', id=16, color=[255, 255, 255], type='', swap=''),
+        dict(name='kpt-15', id=15, color=[255, 0, 0], type='', swap='kpt-17'),
+        16: dict(name='kpt-16', id=16, color=[255, 0, 0], type='', swap=''),
         17:
-        dict(
-            name='kpt-17',
-            id=17,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-15'),
-        18:
-        dict(name='kpt-18', id=18, color=[255, 255, 255], type='', swap='')
+        dict(name='kpt-17', id=17, color=[255, 0, 0], type='', swap='kpt-15'),
+        18: dict(name='kpt-18', id=18, color=[255, 0, 0], type='', swap='')
     },
     skeleton_info={},
     joint_weights=[1.] * 19,
diff --git a/configs/_base_/datasets/coco_wholebody_face.py b/configs/_base_/datasets/coco_wholebody_face.py
index 7c9ee3350e..a3fe1e5b33 100644
--- a/configs/_base_/datasets/coco_wholebody_face.py
+++ b/configs/_base_/datasets/coco_wholebody_face.py
@@ -12,425 +12,131 @@
     ),
     keypoint_info={
         0:
-        dict(
-            name='face-0',
-            id=0,
-            color=[255, 255, 255],
-            type='',
-            swap='face-16'),
+        dict(name='face-0', id=0, color=[255, 0, 0], type='', swap='face-16'),
         1:
-        dict(
-            name='face-1',
-            id=1,
-            color=[255, 255, 255],
-            type='',
-            swap='face-15'),
+        dict(name='face-1', id=1, color=[255, 0, 0], type='', swap='face-15'),
         2:
-        dict(
-            name='face-2',
-            id=2,
-            color=[255, 255, 255],
-            type='',
-            swap='face-14'),
+        dict(name='face-2', id=2, color=[255, 0, 0], type='', swap='face-14'),
         3:
-        dict(
-            name='face-3',
-            id=3,
-            color=[255, 255, 255],
-            type='',
-            swap='face-13'),
+        dict(name='face-3', id=3, color=[255, 0, 0], type='', swap='face-13'),
         4:
-        dict(
-            name='face-4',
-            id=4,
-            color=[255, 255, 255],
-            type='',
-            swap='face-12'),
+        dict(name='face-4', id=4, color=[255, 0, 0], type='', swap='face-12'),
         5:
-        dict(
-            name='face-5',
-            id=5,
-            color=[255, 255, 255],
-            type='',
-            swap='face-11'),
+        dict(name='face-5', id=5, color=[255, 0, 0], type='', swap='face-11'),
         6:
-        dict(
-            name='face-6',
-            id=6,
-            color=[255, 255, 255],
-            type='',
-            swap='face-10'),
+        dict(name='face-6', id=6, color=[255, 0, 0], type='', swap='face-10'),
         7:
-        dict(
-            name='face-7', id=7, color=[255, 255, 255], type='',
-            swap='face-9'),
-        8:
-        dict(name='face-8', id=8, color=[255, 255, 255], type='', swap=''),
+        dict(name='face-7', id=7, color=[255, 0, 0], type='', swap='face-9'),
+        8: dict(name='face-8', id=8, color=[255, 0, 0], type='', swap=''),
         9:
-        dict(
-            name='face-9', id=9, color=[255, 255, 255], type='',
-            swap='face-7'),
+        dict(name='face-9', id=9, color=[255, 0, 0], type='', swap='face-7'),
         10:
-        dict(
-            name='face-10',
-            id=10,
-            color=[255, 255, 255],
-            type='',
-            swap='face-6'),
+        dict(name='face-10', id=10, color=[255, 0, 0], type='', swap='face-6'),
         11:
-        dict(
-            name='face-11',
-            id=11,
-            color=[255, 255, 255],
-            type='',
-            swap='face-5'),
+        dict(name='face-11', id=11, color=[255, 0, 0], type='', swap='face-5'),
         12:
-        dict(
-            name='face-12',
-            id=12,
-            color=[255, 255, 255],
-            type='',
-            swap='face-4'),
+        dict(name='face-12', id=12, color=[255, 0, 0], type='', swap='face-4'),
         13:
-        dict(
-            name='face-13',
-            id=13,
-            color=[255, 255, 255],
-            type='',
-            swap='face-3'),
+        dict(name='face-13', id=13, color=[255, 0, 0], type='', swap='face-3'),
         14:
-        dict(
-            name='face-14',
-            id=14,
-            color=[255, 255, 255],
-            type='',
-            swap='face-2'),
+        dict(name='face-14', id=14, color=[255, 0, 0], type='', swap='face-2'),
         15:
-        dict(
-            name='face-15',
-            id=15,
-            color=[255, 255, 255],
-            type='',
-            swap='face-1'),
+        dict(name='face-15', id=15, color=[255, 0, 0], type='', swap='face-1'),
         16:
-        dict(
-            name='face-16',
-            id=16,
-            color=[255, 255, 255],
-            type='',
-            swap='face-0'),
-        17:
-        dict(
-            name='face-17',
-            id=17,
-            color=[255, 255, 255],
-            type='',
-            swap='face-26'),
-        18:
-        dict(
-            name='face-18',
-            id=18,
-            color=[255, 255, 255],
-            type='',
-            swap='face-25'),
-        19:
-        dict(
-            name='face-19',
-            id=19,
-            color=[255, 255, 255],
-            type='',
-            swap='face-24'),
-        20:
-        dict(
-            name='face-20',
-            id=20,
-            color=[255, 255, 255],
-            type='',
-            swap='face-23'),
-        21:
-        dict(
-            name='face-21',
-            id=21,
-            color=[255, 255, 255],
-            type='',
-            swap='face-22'),
-        22:
-        dict(
-            name='face-22',
-            id=22,
-            color=[255, 255, 255],
-            type='',
-            swap='face-21'),
-        23:
-        dict(
-            name='face-23',
-            id=23,
-            color=[255, 255, 255],
-            type='',
-            swap='face-20'),
-        24:
-        dict(
-            name='face-24',
-            id=24,
-            color=[255, 255, 255],
-            type='',
-            swap='face-19'),
-        25:
-        dict(
-            name='face-25',
-            id=25,
-            color=[255, 255, 255],
-            type='',
-            swap='face-18'),
-        26:
-        dict(
-            name='face-26',
-            id=26,
-            color=[255, 255, 255],
-            type='',
-            swap='face-17'),
-        27:
-        dict(name='face-27', id=27, color=[255, 255, 255], type='', swap=''),
-        28:
-        dict(name='face-28', id=28, color=[255, 255, 255], type='', swap=''),
-        29:
-        dict(name='face-29', id=29, color=[255, 255, 255], type='', swap=''),
-        30:
-        dict(name='face-30', id=30, color=[255, 255, 255], type='', swap=''),
-        31:
-        dict(
-            name='face-31',
-            id=31,
-            color=[255, 255, 255],
-            type='',
-            swap='face-35'),
-        32:
-        dict(
-            name='face-32',
-            id=32,
-            color=[255, 255, 255],
-            type='',
-            swap='face-34'),
-        33:
-        dict(name='face-33', id=33, color=[255, 255, 255], type='', swap=''),
-        34:
-        dict(
-            name='face-34',
-            id=34,
-            color=[255, 255, 255],
-            type='',
-            swap='face-32'),
-        35:
-        dict(
-            name='face-35',
-            id=35,
-            color=[255, 255, 255],
-            type='',
-            swap='face-31'),
-        36:
-        dict(
-            name='face-36',
-            id=36,
-            color=[255, 255, 255],
-            type='',
-            swap='face-45'),
-        37:
-        dict(
-            name='face-37',
-            id=37,
-            color=[255, 255, 255],
-            type='',
-            swap='face-44'),
-        38:
-        dict(
-            name='face-38',
-            id=38,
-            color=[255, 255, 255],
-            type='',
-            swap='face-43'),
-        39:
-        dict(
-            name='face-39',
-            id=39,
-            color=[255, 255, 255],
-            type='',
-            swap='face-42'),
-        40:
-        dict(
-            name='face-40',
-            id=40,
-            color=[255, 255, 255],
-            type='',
-            swap='face-47'),
-        41:
-        dict(
-            name='face-41',
-            id=41,
-            color=[255, 255, 255],
-            type='',
-            swap='face-46'),
-        42:
-        dict(
-            name='face-42',
-            id=42,
-            color=[255, 255, 255],
-            type='',
-            swap='face-39'),
-        43:
-        dict(
-            name='face-43',
-            id=43,
-            color=[255, 255, 255],
-            type='',
-            swap='face-38'),
-        44:
-        dict(
-            name='face-44',
-            id=44,
-            color=[255, 255, 255],
-            type='',
-            swap='face-37'),
-        45:
-        dict(
-            name='face-45',
-            id=45,
-            color=[255, 255, 255],
-            type='',
-            swap='face-36'),
-        46:
-        dict(
-            name='face-46',
-            id=46,
-            color=[255, 255, 255],
-            type='',
-            swap='face-41'),
-        47:
-        dict(
-            name='face-47',
-            id=47,
-            color=[255, 255, 255],
-            type='',
-            swap='face-40'),
-        48:
-        dict(
-            name='face-48',
-            id=48,
-            color=[255, 255, 255],
-            type='',
-            swap='face-54'),
-        49:
-        dict(
-            name='face-49',
-            id=49,
-            color=[255, 255, 255],
-            type='',
-            swap='face-53'),
-        50:
-        dict(
-            name='face-50',
-            id=50,
-            color=[255, 255, 255],
-            type='',
-            swap='face-52'),
-        51:
-        dict(name='face-51', id=52, color=[255, 255, 255], type='', swap=''),
-        52:
-        dict(
-            name='face-52',
-            id=52,
-            color=[255, 255, 255],
-            type='',
-            swap='face-50'),
-        53:
-        dict(
-            name='face-53',
-            id=53,
-            color=[255, 255, 255],
-            type='',
-            swap='face-49'),
-        54:
-        dict(
-            name='face-54',
-            id=54,
-            color=[255, 255, 255],
-            type='',
-            swap='face-48'),
-        55:
-        dict(
-            name='face-55',
-            id=55,
-            color=[255, 255, 255],
-            type='',
-            swap='face-59'),
-        56:
-        dict(
-            name='face-56',
-            id=56,
-            color=[255, 255, 255],
-            type='',
-            swap='face-58'),
-        57:
-        dict(name='face-57', id=57, color=[255, 255, 255], type='', swap=''),
-        58:
-        dict(
-            name='face-58',
-            id=58,
-            color=[255, 255, 255],
-            type='',
-            swap='face-56'),
-        59:
-        dict(
-            name='face-59',
-            id=59,
-            color=[255, 255, 255],
-            type='',
-            swap='face-55'),
-        60:
-        dict(
-            name='face-60',
-            id=60,
-            color=[255, 255, 255],
-            type='',
-            swap='face-64'),
-        61:
-        dict(
-            name='face-61',
-            id=61,
-            color=[255, 255, 255],
-            type='',
-            swap='face-63'),
-        62:
-        dict(name='face-62', id=62, color=[255, 255, 255], type='', swap=''),
-        63:
-        dict(
-            name='face-63',
-            id=63,
-            color=[255, 255, 255],
-            type='',
-            swap='face-61'),
-        64:
-        dict(
-            name='face-64',
-            id=64,
-            color=[255, 255, 255],
-            type='',
-            swap='face-60'),
-        65:
-        dict(
-            name='face-65',
-            id=65,
-            color=[255, 255, 255],
-            type='',
-            swap='face-67'),
-        66:
-        dict(name='face-66', id=66, color=[255, 255, 255], type='', swap=''),
-        67:
-        dict(
-            name='face-67',
-            id=67,
-            color=[255, 255, 255],
-            type='',
-            swap='face-65')
+        dict(name='face-16', id=16, color=[255, 0, 0], type='', swap='face-0'),
+        17: dict(
+            name='face-17', id=17, color=[255, 0, 0], type='', swap='face-26'),
+        18: dict(
+            name='face-18', id=18, color=[255, 0, 0], type='', swap='face-25'),
+        19: dict(
+            name='face-19', id=19, color=[255, 0, 0], type='', swap='face-24'),
+        20: dict(
+            name='face-20', id=20, color=[255, 0, 0], type='', swap='face-23'),
+        21: dict(
+            name='face-21', id=21, color=[255, 0, 0], type='', swap='face-22'),
+        22: dict(
+            name='face-22', id=22, color=[255, 0, 0], type='', swap='face-21'),
+        23: dict(
+            name='face-23', id=23, color=[255, 0, 0], type='', swap='face-20'),
+        24: dict(
+            name='face-24', id=24, color=[255, 0, 0], type='', swap='face-19'),
+        25: dict(
+            name='face-25', id=25, color=[255, 0, 0], type='', swap='face-18'),
+        26: dict(
+            name='face-26', id=26, color=[255, 0, 0], type='', swap='face-17'),
+        27: dict(name='face-27', id=27, color=[255, 0, 0], type='', swap=''),
+        28: dict(name='face-28', id=28, color=[255, 0, 0], type='', swap=''),
+        29: dict(name='face-29', id=29, color=[255, 0, 0], type='', swap=''),
+        30: dict(name='face-30', id=30, color=[255, 0, 0], type='', swap=''),
+        31: dict(
+            name='face-31', id=31, color=[255, 0, 0], type='', swap='face-35'),
+        32: dict(
+            name='face-32', id=32, color=[255, 0, 0], type='', swap='face-34'),
+        33: dict(name='face-33', id=33, color=[255, 0, 0], type='', swap=''),
+        34: dict(
+            name='face-34', id=34, color=[255, 0, 0], type='', swap='face-32'),
+        35: dict(
+            name='face-35', id=35, color=[255, 0, 0], type='', swap='face-31'),
+        36: dict(
+            name='face-36', id=36, color=[255, 0, 0], type='', swap='face-45'),
+        37: dict(
+            name='face-37', id=37, color=[255, 0, 0], type='', swap='face-44'),
+        38: dict(
+            name='face-38', id=38, color=[255, 0, 0], type='', swap='face-43'),
+        39: dict(
+            name='face-39', id=39, color=[255, 0, 0], type='', swap='face-42'),
+        40: dict(
+            name='face-40', id=40, color=[255, 0, 0], type='', swap='face-47'),
+        41: dict(
+            name='face-41', id=41, color=[255, 0, 0], type='', swap='face-46'),
+        42: dict(
+            name='face-42', id=42, color=[255, 0, 0], type='', swap='face-39'),
+        43: dict(
+            name='face-43', id=43, color=[255, 0, 0], type='', swap='face-38'),
+        44: dict(
+            name='face-44', id=44, color=[255, 0, 0], type='', swap='face-37'),
+        45: dict(
+            name='face-45', id=45, color=[255, 0, 0], type='', swap='face-36'),
+        46: dict(
+            name='face-46', id=46, color=[255, 0, 0], type='', swap='face-41'),
+        47: dict(
+            name='face-47', id=47, color=[255, 0, 0], type='', swap='face-40'),
+        48: dict(
+            name='face-48', id=48, color=[255, 0, 0], type='', swap='face-54'),
+        49: dict(
+            name='face-49', id=49, color=[255, 0, 0], type='', swap='face-53'),
+        50: dict(
+            name='face-50', id=50, color=[255, 0, 0], type='', swap='face-52'),
+        51: dict(name='face-51', id=52, color=[255, 0, 0], type='', swap=''),
+        52: dict(
+            name='face-52', id=52, color=[255, 0, 0], type='', swap='face-50'),
+        53: dict(
+            name='face-53', id=53, color=[255, 0, 0], type='', swap='face-49'),
+        54: dict(
+            name='face-54', id=54, color=[255, 0, 0], type='', swap='face-48'),
+        55: dict(
+            name='face-55', id=55, color=[255, 0, 0], type='', swap='face-59'),
+        56: dict(
+            name='face-56', id=56, color=[255, 0, 0], type='', swap='face-58'),
+        57: dict(name='face-57', id=57, color=[255, 0, 0], type='', swap=''),
+        58: dict(
+            name='face-58', id=58, color=[255, 0, 0], type='', swap='face-56'),
+        59: dict(
+            name='face-59', id=59, color=[255, 0, 0], type='', swap='face-55'),
+        60: dict(
+            name='face-60', id=60, color=[255, 0, 0], type='', swap='face-64'),
+        61: dict(
+            name='face-61', id=61, color=[255, 0, 0], type='', swap='face-63'),
+        62: dict(name='face-62', id=62, color=[255, 0, 0], type='', swap=''),
+        63: dict(
+            name='face-63', id=63, color=[255, 0, 0], type='', swap='face-61'),
+        64: dict(
+            name='face-64', id=64, color=[255, 0, 0], type='', swap='face-60'),
+        65: dict(
+            name='face-65', id=65, color=[255, 0, 0], type='', swap='face-67'),
+        66: dict(name='face-66', id=66, color=[255, 0, 0], type='', swap=''),
+        67: dict(
+            name='face-67', id=67, color=[255, 0, 0], type='', swap='face-65')
     },
     skeleton_info={},
     joint_weights=[1.] * 68,
diff --git a/configs/_base_/datasets/cofw.py b/configs/_base_/datasets/cofw.py
index 2fb7ad2f8d..d528bf2f2f 100644
--- a/configs/_base_/datasets/cofw.py
+++ b/configs/_base_/datasets/cofw.py
@@ -10,124 +10,47 @@
         homepage='http://www.vision.caltech.edu/xpburgos/ICCV13/',
     ),
     keypoint_info={
-        0:
-        dict(name='kpt-0', id=0, color=[255, 255, 255], type='', swap='kpt-1'),
-        1:
-        dict(name='kpt-1', id=1, color=[255, 255, 255], type='', swap='kpt-0'),
-        2:
-        dict(name='kpt-2', id=2, color=[255, 255, 255], type='', swap='kpt-3'),
-        3:
-        dict(name='kpt-3', id=3, color=[255, 255, 255], type='', swap='kpt-2'),
-        4:
-        dict(name='kpt-4', id=4, color=[255, 255, 255], type='', swap='kpt-6'),
-        5:
-        dict(name='kpt-5', id=5, color=[255, 255, 255], type='', swap='kpt-7'),
-        6:
-        dict(name='kpt-6', id=6, color=[255, 255, 255], type='', swap='kpt-4'),
-        7:
-        dict(name='kpt-7', id=7, color=[255, 255, 255], type='', swap='kpt-5'),
-        8:
-        dict(name='kpt-8', id=8, color=[255, 255, 255], type='', swap='kpt-9'),
-        9:
-        dict(name='kpt-9', id=9, color=[255, 255, 255], type='', swap='kpt-8'),
+        0: dict(name='kpt-0', id=0, color=[255, 0, 0], type='', swap='kpt-1'),
+        1: dict(name='kpt-1', id=1, color=[255, 0, 0], type='', swap='kpt-0'),
+        2: dict(name='kpt-2', id=2, color=[255, 0, 0], type='', swap='kpt-3'),
+        3: dict(name='kpt-3', id=3, color=[255, 0, 0], type='', swap='kpt-2'),
+        4: dict(name='kpt-4', id=4, color=[255, 0, 0], type='', swap='kpt-6'),
+        5: dict(name='kpt-5', id=5, color=[255, 0, 0], type='', swap='kpt-7'),
+        6: dict(name='kpt-6', id=6, color=[255, 0, 0], type='', swap='kpt-4'),
+        7: dict(name='kpt-7', id=7, color=[255, 0, 0], type='', swap='kpt-5'),
+        8: dict(name='kpt-8', id=8, color=[255, 0, 0], type='', swap='kpt-9'),
+        9: dict(name='kpt-9', id=9, color=[255, 0, 0], type='', swap='kpt-8'),
         10:
-        dict(
-            name='kpt-10',
-            id=10,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-11'),
+        dict(name='kpt-10', id=10, color=[255, 0, 0], type='', swap='kpt-11'),
         11:
-        dict(
-            name='kpt-11',
-            id=11,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-10'),
+        dict(name='kpt-11', id=11, color=[255, 0, 0], type='', swap='kpt-10'),
         12:
-        dict(
-            name='kpt-12',
-            id=12,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-14'),
+        dict(name='kpt-12', id=12, color=[255, 0, 0], type='', swap='kpt-14'),
         13:
-        dict(
-            name='kpt-13',
-            id=13,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-15'),
+        dict(name='kpt-13', id=13, color=[255, 0, 0], type='', swap='kpt-15'),
         14:
-        dict(
-            name='kpt-14',
-            id=14,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-12'),
+        dict(name='kpt-14', id=14, color=[255, 0, 0], type='', swap='kpt-12'),
         15:
-        dict(
-            name='kpt-15',
-            id=15,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-13'),
+        dict(name='kpt-15', id=15, color=[255, 0, 0], type='', swap='kpt-13'),
         16:
-        dict(
-            name='kpt-16',
-            id=16,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-17'),
+        dict(name='kpt-16', id=16, color=[255, 0, 0], type='', swap='kpt-17'),
         17:
-        dict(
-            name='kpt-17',
-            id=17,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-16'),
+        dict(name='kpt-17', id=17, color=[255, 0, 0], type='', swap='kpt-16'),
         18:
-        dict(
-            name='kpt-18',
-            id=18,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-19'),
+        dict(name='kpt-18', id=18, color=[255, 0, 0], type='', swap='kpt-19'),
         19:
-        dict(
-            name='kpt-19',
-            id=19,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-18'),
-        20:
-        dict(name='kpt-20', id=20, color=[255, 255, 255], type='', swap=''),
-        21:
-        dict(name='kpt-21', id=21, color=[255, 255, 255], type='', swap=''),
+        dict(name='kpt-19', id=19, color=[255, 0, 0], type='', swap='kpt-18'),
+        20: dict(name='kpt-20', id=20, color=[255, 0, 0], type='', swap=''),
+        21: dict(name='kpt-21', id=21, color=[255, 0, 0], type='', swap=''),
         22:
-        dict(
-            name='kpt-22',
-            id=22,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-23'),
+        dict(name='kpt-22', id=22, color=[255, 0, 0], type='', swap='kpt-23'),
         23:
-        dict(
-            name='kpt-23',
-            id=23,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-22'),
-        24:
-        dict(name='kpt-24', id=24, color=[255, 255, 255], type='', swap=''),
-        25:
-        dict(name='kpt-25', id=25, color=[255, 255, 255], type='', swap=''),
-        26:
-        dict(name='kpt-26', id=26, color=[255, 255, 255], type='', swap=''),
-        27:
-        dict(name='kpt-27', id=27, color=[255, 255, 255], type='', swap=''),
-        28:
-        dict(name='kpt-28', id=28, color=[255, 255, 255], type='', swap='')
+        dict(name='kpt-23', id=23, color=[255, 0, 0], type='', swap='kpt-22'),
+        24: dict(name='kpt-24', id=24, color=[255, 0, 0], type='', swap=''),
+        25: dict(name='kpt-25', id=25, color=[255, 0, 0], type='', swap=''),
+        26: dict(name='kpt-26', id=26, color=[255, 0, 0], type='', swap=''),
+        27: dict(name='kpt-27', id=27, color=[255, 0, 0], type='', swap=''),
+        28: dict(name='kpt-28', id=28, color=[255, 0, 0], type='', swap='')
     },
     skeleton_info={},
     joint_weights=[1.] * 29,
diff --git a/configs/_base_/datasets/wflw.py b/configs/_base_/datasets/wflw.py
index bed6f56f30..80c29b696c 100644
--- a/configs/_base_/datasets/wflw.py
+++ b/configs/_base_/datasets/wflw.py
@@ -10,572 +10,182 @@
         homepage='https://wywu.github.io/projects/LAB/WFLW.html',
     ),
     keypoint_info={
-        0:
-        dict(
-            name='kpt-0', id=0, color=[255, 255, 255], type='', swap='kpt-32'),
-        1:
-        dict(
-            name='kpt-1', id=1, color=[255, 255, 255], type='', swap='kpt-31'),
-        2:
-        dict(
-            name='kpt-2', id=2, color=[255, 255, 255], type='', swap='kpt-30'),
-        3:
-        dict(
-            name='kpt-3', id=3, color=[255, 255, 255], type='', swap='kpt-29'),
-        4:
-        dict(
-            name='kpt-4', id=4, color=[255, 255, 255], type='', swap='kpt-28'),
-        5:
-        dict(
-            name='kpt-5', id=5, color=[255, 255, 255], type='', swap='kpt-27'),
-        6:
-        dict(
-            name='kpt-6', id=6, color=[255, 255, 255], type='', swap='kpt-26'),
-        7:
-        dict(
-            name='kpt-7', id=7, color=[255, 255, 255], type='', swap='kpt-25'),
-        8:
-        dict(
-            name='kpt-8', id=8, color=[255, 255, 255], type='', swap='kpt-24'),
-        9:
-        dict(
-            name='kpt-9', id=9, color=[255, 255, 255], type='', swap='kpt-23'),
+        0: dict(name='kpt-0', id=0, color=[255, 0, 0], type='', swap='kpt-32'),
+        1: dict(name='kpt-1', id=1, color=[255, 0, 0], type='', swap='kpt-31'),
+        2: dict(name='kpt-2', id=2, color=[255, 0, 0], type='', swap='kpt-30'),
+        3: dict(name='kpt-3', id=3, color=[255, 0, 0], type='', swap='kpt-29'),
+        4: dict(name='kpt-4', id=4, color=[255, 0, 0], type='', swap='kpt-28'),
+        5: dict(name='kpt-5', id=5, color=[255, 0, 0], type='', swap='kpt-27'),
+        6: dict(name='kpt-6', id=6, color=[255, 0, 0], type='', swap='kpt-26'),
+        7: dict(name='kpt-7', id=7, color=[255, 0, 0], type='', swap='kpt-25'),
+        8: dict(name='kpt-8', id=8, color=[255, 0, 0], type='', swap='kpt-24'),
+        9: dict(name='kpt-9', id=9, color=[255, 0, 0], type='', swap='kpt-23'),
         10:
-        dict(
-            name='kpt-10',
-            id=10,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-22'),
+        dict(name='kpt-10', id=10, color=[255, 0, 0], type='', swap='kpt-22'),
         11:
-        dict(
-            name='kpt-11',
-            id=11,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-21'),
+        dict(name='kpt-11', id=11, color=[255, 0, 0], type='', swap='kpt-21'),
         12:
-        dict(
-            name='kpt-12',
-            id=12,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-20'),
+        dict(name='kpt-12', id=12, color=[255, 0, 0], type='', swap='kpt-20'),
         13:
-        dict(
-            name='kpt-13',
-            id=13,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-19'),
+        dict(name='kpt-13', id=13, color=[255, 0, 0], type='', swap='kpt-19'),
         14:
-        dict(
-            name='kpt-14',
-            id=14,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-18'),
+        dict(name='kpt-14', id=14, color=[255, 0, 0], type='', swap='kpt-18'),
         15:
-        dict(
-            name='kpt-15',
-            id=15,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-17'),
-        16:
-        dict(name='kpt-16', id=16, color=[255, 255, 255], type='', swap=''),
+        dict(name='kpt-15', id=15, color=[255, 0, 0], type='', swap='kpt-17'),
+        16: dict(name='kpt-16', id=16, color=[255, 0, 0], type='', swap=''),
         17:
-        dict(
-            name='kpt-17',
-            id=17,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-15'),
+        dict(name='kpt-17', id=17, color=[255, 0, 0], type='', swap='kpt-15'),
         18:
-        dict(
-            name='kpt-18',
-            id=18,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-14'),
+        dict(name='kpt-18', id=18, color=[255, 0, 0], type='', swap='kpt-14'),
         19:
-        dict(
-            name='kpt-19',
-            id=19,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-13'),
+        dict(name='kpt-19', id=19, color=[255, 0, 0], type='', swap='kpt-13'),
         20:
-        dict(
-            name='kpt-20',
-            id=20,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-12'),
+        dict(name='kpt-20', id=20, color=[255, 0, 0], type='', swap='kpt-12'),
         21:
-        dict(
-            name='kpt-21',
-            id=21,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-11'),
+        dict(name='kpt-21', id=21, color=[255, 0, 0], type='', swap='kpt-11'),
         22:
-        dict(
-            name='kpt-22',
-            id=22,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-10'),
+        dict(name='kpt-22', id=22, color=[255, 0, 0], type='', swap='kpt-10'),
         23:
-        dict(
-            name='kpt-23', id=23, color=[255, 255, 255], type='',
-            swap='kpt-9'),
+        dict(name='kpt-23', id=23, color=[255, 0, 0], type='', swap='kpt-9'),
         24:
-        dict(
-            name='kpt-24', id=24, color=[255, 255, 255], type='',
-            swap='kpt-8'),
+        dict(name='kpt-24', id=24, color=[255, 0, 0], type='', swap='kpt-8'),
         25:
-        dict(
-            name='kpt-25', id=25, color=[255, 255, 255], type='',
-            swap='kpt-7'),
+        dict(name='kpt-25', id=25, color=[255, 0, 0], type='', swap='kpt-7'),
         26:
-        dict(
-            name='kpt-26', id=26, color=[255, 255, 255], type='',
-            swap='kpt-6'),
+        dict(name='kpt-26', id=26, color=[255, 0, 0], type='', swap='kpt-6'),
         27:
-        dict(
-            name='kpt-27', id=27, color=[255, 255, 255], type='',
-            swap='kpt-5'),
+        dict(name='kpt-27', id=27, color=[255, 0, 0], type='', swap='kpt-5'),
         28:
-        dict(
-            name='kpt-28', id=28, color=[255, 255, 255], type='',
-            swap='kpt-4'),
+        dict(name='kpt-28', id=28, color=[255, 0, 0], type='', swap='kpt-4'),
         29:
-        dict(
-            name='kpt-29', id=29, color=[255, 255, 255], type='',
-            swap='kpt-3'),
+        dict(name='kpt-29', id=29, color=[255, 0, 0], type='', swap='kpt-3'),
         30:
-        dict(
-            name='kpt-30', id=30, color=[255, 255, 255], type='',
-            swap='kpt-2'),
+        dict(name='kpt-30', id=30, color=[255, 0, 0], type='', swap='kpt-2'),
         31:
-        dict(
-            name='kpt-31', id=31, color=[255, 255, 255], type='',
-            swap='kpt-1'),
+        dict(name='kpt-31', id=31, color=[255, 0, 0], type='', swap='kpt-1'),
         32:
-        dict(
-            name='kpt-32', id=32, color=[255, 255, 255], type='',
-            swap='kpt-0'),
+        dict(name='kpt-32', id=32, color=[255, 0, 0], type='', swap='kpt-0'),
         33:
-        dict(
-            name='kpt-33',
-            id=33,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-46'),
+        dict(name='kpt-33', id=33, color=[255, 0, 0], type='', swap='kpt-46'),
         34:
-        dict(
-            name='kpt-34',
-            id=34,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-45'),
+        dict(name='kpt-34', id=34, color=[255, 0, 0], type='', swap='kpt-45'),
         35:
-        dict(
-            name='kpt-35',
-            id=35,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-44'),
+        dict(name='kpt-35', id=35, color=[255, 0, 0], type='', swap='kpt-44'),
         36:
-        dict(
-            name='kpt-36',
-            id=36,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-43'),
-        37:
-        dict(
-            name='kpt-37',
-            id=37,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-42'),
-        38:
-        dict(
-            name='kpt-38',
-            id=38,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-50'),
-        39:
-        dict(
-            name='kpt-39',
-            id=39,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-49'),
-        40:
-        dict(
-            name='kpt-40',
-            id=40,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-48'),
-        41:
-        dict(
-            name='kpt-41',
-            id=41,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-47'),
-        42:
-        dict(
-            name='kpt-42',
-            id=42,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-37'),
-        43:
-        dict(
-            name='kpt-43',
-            id=43,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-36'),
-        44:
-        dict(
-            name='kpt-44',
-            id=44,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-35'),
-        45:
-        dict(
-            name='kpt-45',
-            id=45,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-34'),
-        46:
-        dict(
-            name='kpt-46',
-            id=46,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-33'),
-        47:
-        dict(
-            name='kpt-47',
-            id=47,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-41'),
-        48:
-        dict(
-            name='kpt-48',
-            id=48,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-40'),
-        49:
-        dict(
-            name='kpt-49',
-            id=49,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-39'),
-        50:
-        dict(
-            name='kpt-50',
-            id=50,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-38'),
-        51:
-        dict(name='kpt-51', id=51, color=[255, 255, 255], type='', swap=''),
-        52:
-        dict(name='kpt-52', id=52, color=[255, 255, 255], type='', swap=''),
-        53:
-        dict(name='kpt-53', id=53, color=[255, 255, 255], type='', swap=''),
-        54:
-        dict(name='kpt-54', id=54, color=[255, 255, 255], type='', swap=''),
-        55:
-        dict(
-            name='kpt-55',
-            id=55,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-59'),
-        56:
-        dict(
-            name='kpt-56',
-            id=56,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-58'),
-        57:
-        dict(name='kpt-57', id=57, color=[255, 255, 255], type='', swap=''),
-        58:
-        dict(
-            name='kpt-58',
-            id=58,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-56'),
-        59:
-        dict(
-            name='kpt-59',
-            id=59,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-55'),
-        60:
-        dict(
-            name='kpt-60',
-            id=60,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-72'),
-        61:
-        dict(
-            name='kpt-61',
-            id=61,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-71'),
-        62:
-        dict(
-            name='kpt-62',
-            id=62,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-70'),
-        63:
-        dict(
-            name='kpt-63',
-            id=63,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-69'),
-        64:
-        dict(
-            name='kpt-64',
-            id=64,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-68'),
-        65:
-        dict(
-            name='kpt-65',
-            id=65,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-75'),
-        66:
-        dict(
-            name='kpt-66',
-            id=66,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-74'),
-        67:
-        dict(
-            name='kpt-67',
-            id=67,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-73'),
-        68:
-        dict(
-            name='kpt-68',
-            id=68,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-64'),
-        69:
-        dict(
-            name='kpt-69',
-            id=69,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-63'),
-        70:
-        dict(
-            name='kpt-70',
-            id=70,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-62'),
-        71:
-        dict(
-            name='kpt-71',
-            id=71,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-61'),
-        72:
-        dict(
-            name='kpt-72',
-            id=72,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-60'),
-        73:
-        dict(
-            name='kpt-73',
-            id=73,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-67'),
-        74:
-        dict(
-            name='kpt-74',
-            id=74,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-66'),
-        75:
-        dict(
-            name='kpt-75',
-            id=75,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-65'),
-        76:
-        dict(
-            name='kpt-76',
-            id=76,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-82'),
-        77:
-        dict(
-            name='kpt-77',
-            id=77,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-81'),
-        78:
-        dict(
-            name='kpt-78',
-            id=78,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-80'),
-        79:
-        dict(name='kpt-79', id=79, color=[255, 255, 255], type='', swap=''),
-        80:
-        dict(
-            name='kpt-80',
-            id=80,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-78'),
-        81:
-        dict(
-            name='kpt-81',
-            id=81,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-77'),
-        82:
-        dict(
-            name='kpt-82',
-            id=82,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-76'),
-        83:
-        dict(
-            name='kpt-83',
-            id=83,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-87'),
-        84:
-        dict(
-            name='kpt-84',
-            id=84,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-86'),
-        85:
-        dict(name='kpt-85', id=85, color=[255, 255, 255], type='', swap=''),
-        86:
-        dict(
-            name='kpt-86',
-            id=86,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-84'),
-        87:
-        dict(
-            name='kpt-87',
-            id=87,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-83'),
-        88:
-        dict(
-            name='kpt-88',
-            id=88,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-92'),
-        89:
-        dict(
-            name='kpt-89',
-            id=89,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-91'),
-        90:
-        dict(name='kpt-90', id=90, color=[255, 255, 255], type='', swap=''),
-        91:
-        dict(
-            name='kpt-91',
-            id=91,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-89'),
-        92:
-        dict(
-            name='kpt-92',
-            id=92,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-88'),
-        93:
-        dict(
-            name='kpt-93',
-            id=93,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-95'),
-        94:
-        dict(name='kpt-94', id=94, color=[255, 255, 255], type='', swap=''),
-        95:
-        dict(
-            name='kpt-95',
-            id=95,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-93'),
-        96:
-        dict(
-            name='kpt-96',
-            id=96,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-97'),
-        97:
-        dict(
-            name='kpt-97',
-            id=97,
-            color=[255, 255, 255],
-            type='',
-            swap='kpt-96')
+        dict(name='kpt-36', id=36, color=[255, 0, 0], type='', swap='kpt-43'),
+        37: dict(
+            name='kpt-37', id=37, color=[255, 0, 0], type='', swap='kpt-42'),
+        38: dict(
+            name='kpt-38', id=38, color=[255, 0, 0], type='', swap='kpt-50'),
+        39: dict(
+            name='kpt-39', id=39, color=[255, 0, 0], type='', swap='kpt-49'),
+        40: dict(
+            name='kpt-40', id=40, color=[255, 0, 0], type='', swap='kpt-48'),
+        41: dict(
+            name='kpt-41', id=41, color=[255, 0, 0], type='', swap='kpt-47'),
+        42: dict(
+            name='kpt-42', id=42, color=[255, 0, 0], type='', swap='kpt-37'),
+        43: dict(
+            name='kpt-43', id=43, color=[255, 0, 0], type='', swap='kpt-36'),
+        44: dict(
+            name='kpt-44', id=44, color=[255, 0, 0], type='', swap='kpt-35'),
+        45: dict(
+            name='kpt-45', id=45, color=[255, 0, 0], type='', swap='kpt-34'),
+        46: dict(
+            name='kpt-46', id=46, color=[255, 0, 0], type='', swap='kpt-33'),
+        47: dict(
+            name='kpt-47', id=47, color=[255, 0, 0], type='', swap='kpt-41'),
+        48: dict(
+            name='kpt-48', id=48, color=[255, 0, 0], type='', swap='kpt-40'),
+        49: dict(
+            name='kpt-49', id=49, color=[255, 0, 0], type='', swap='kpt-39'),
+        50: dict(
+            name='kpt-50', id=50, color=[255, 0, 0], type='', swap='kpt-38'),
+        51: dict(name='kpt-51', id=51, color=[255, 0, 0], type='', swap=''),
+        52: dict(name='kpt-52', id=52, color=[255, 0, 0], type='', swap=''),
+        53: dict(name='kpt-53', id=53, color=[255, 0, 0], type='', swap=''),
+        54: dict(name='kpt-54', id=54, color=[255, 0, 0], type='', swap=''),
+        55: dict(
+            name='kpt-55', id=55, color=[255, 0, 0], type='', swap='kpt-59'),
+        56: dict(
+            name='kpt-56', id=56, color=[255, 0, 0], type='', swap='kpt-58'),
+        57: dict(name='kpt-57', id=57, color=[255, 0, 0], type='', swap=''),
+        58: dict(
+            name='kpt-58', id=58, color=[255, 0, 0], type='', swap='kpt-56'),
+        59: dict(
+            name='kpt-59', id=59, color=[255, 0, 0], type='', swap='kpt-55'),
+        60: dict(
+            name='kpt-60', id=60, color=[255, 0, 0], type='', swap='kpt-72'),
+        61: dict(
+            name='kpt-61', id=61, color=[255, 0, 0], type='', swap='kpt-71'),
+        62: dict(
+            name='kpt-62', id=62, color=[255, 0, 0], type='', swap='kpt-70'),
+        63: dict(
+            name='kpt-63', id=63, color=[255, 0, 0], type='', swap='kpt-69'),
+        64: dict(
+            name='kpt-64', id=64, color=[255, 0, 0], type='', swap='kpt-68'),
+        65: dict(
+            name='kpt-65', id=65, color=[255, 0, 0], type='', swap='kpt-75'),
+        66: dict(
+            name='kpt-66', id=66, color=[255, 0, 0], type='', swap='kpt-74'),
+        67: dict(
+            name='kpt-67', id=67, color=[255, 0, 0], type='', swap='kpt-73'),
+        68: dict(
+            name='kpt-68', id=68, color=[255, 0, 0], type='', swap='kpt-64'),
+        69: dict(
+            name='kpt-69', id=69, color=[255, 0, 0], type='', swap='kpt-63'),
+        70: dict(
+            name='kpt-70', id=70, color=[255, 0, 0], type='', swap='kpt-62'),
+        71: dict(
+            name='kpt-71', id=71, color=[255, 0, 0], type='', swap='kpt-61'),
+        72: dict(
+            name='kpt-72', id=72, color=[255, 0, 0], type='', swap='kpt-60'),
+        73: dict(
+            name='kpt-73', id=73, color=[255, 0, 0], type='', swap='kpt-67'),
+        74: dict(
+            name='kpt-74', id=74, color=[255, 0, 0], type='', swap='kpt-66'),
+        75: dict(
+            name='kpt-75', id=75, color=[255, 0, 0], type='', swap='kpt-65'),
+        76: dict(
+            name='kpt-76', id=76, color=[255, 0, 0], type='', swap='kpt-82'),
+        77: dict(
+            name='kpt-77', id=77, color=[255, 0, 0], type='', swap='kpt-81'),
+        78: dict(
+            name='kpt-78', id=78, color=[255, 0, 0], type='', swap='kpt-80'),
+        79: dict(name='kpt-79', id=79, color=[255, 0, 0], type='', swap=''),
+        80: dict(
+            name='kpt-80', id=80, color=[255, 0, 0], type='', swap='kpt-78'),
+        81: dict(
+            name='kpt-81', id=81, color=[255, 0, 0], type='', swap='kpt-77'),
+        82: dict(
+            name='kpt-82', id=82, color=[255, 0, 0], type='', swap='kpt-76'),
+        83: dict(
+            name='kpt-83', id=83, color=[255, 0, 0], type='', swap='kpt-87'),
+        84: dict(
+            name='kpt-84', id=84, color=[255, 0, 0], type='', swap='kpt-86'),
+        85: dict(name='kpt-85', id=85, color=[255, 0, 0], type='', swap=''),
+        86: dict(
+            name='kpt-86', id=86, color=[255, 0, 0], type='', swap='kpt-84'),
+        87: dict(
+            name='kpt-87', id=87, color=[255, 0, 0], type='', swap='kpt-83'),
+        88: dict(
+            name='kpt-88', id=88, color=[255, 0, 0], type='', swap='kpt-92'),
+        89: dict(
+            name='kpt-89', id=89, color=[255, 0, 0], type='', swap='kpt-91'),
+        90: dict(name='kpt-90', id=90, color=[255, 0, 0], type='', swap=''),
+        91: dict(
+            name='kpt-91', id=91, color=[255, 0, 0], type='', swap='kpt-89'),
+        92: dict(
+            name='kpt-92', id=92, color=[255, 0, 0], type='', swap='kpt-88'),
+        93: dict(
+            name='kpt-93', id=93, color=[255, 0, 0], type='', swap='kpt-95'),
+        94: dict(name='kpt-94', id=94, color=[255, 0, 0], type='', swap=''),
+        95: dict(
+            name='kpt-95', id=95, color=[255, 0, 0], type='', swap='kpt-93'),
+        96: dict(
+            name='kpt-96', id=96, color=[255, 0, 0], type='', swap='kpt-97'),
+        97: dict(
+            name='kpt-97', id=97, color=[255, 0, 0], type='', swap='kpt-96')
     },
     skeleton_info={},
     joint_weights=[1.] * 98,

From 9f003540fa45772116d3bee38225cbb3d1b276fd Mon Sep 17 00:00:00 2001
From: lupeng <penglu2097@gmail.com>
Date: Wed, 22 Feb 2023 14:33:28 +0800
Subject: [PATCH 16/17] update demo results

---
 demo/docs/2d_face_demo.md | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/demo/docs/2d_face_demo.md b/demo/docs/2d_face_demo.md
index e1fce81eb8..f248077821 100644
--- a/demo/docs/2d_face_demo.md
+++ b/demo/docs/2d_face_demo.md
@@ -31,7 +31,7 @@ python demo/topdown_demo_with_mmdet.py \
 
 Visualization result:
 
-<img src="https://user-images.githubusercontent.com/87690686/190857851-8d5afe60-fadf-4aa8-9a1c-5b32aaec7c79.jpg" height="500px" alt><br>
+<img src="https://user-images.githubusercontent.com/26127467/220538388-582ce90d-751a-40dd-ac06-3bc078b773a0.jpg" height="500px" alt><br>
 
 If you use a heatmap-based model and set argument `--draw-heatmap`, the predicted heatmap will be visualized together with the keypoints.
 
@@ -66,16 +66,10 @@ python demo/topdown_demo_with_mmdet.py \
 Videos share the same interface with images. The difference is that the `${INPUT_PATH}` for videos can be the local path or **URL** link to video file.
 
 ```shell
-python demo/topdown_demo_with_mmdet.py \
-    demo/mmdetection_cfg/yolox-s_8xb8-300e_coco-face.py \
-    https://download.openmmlab.com/mmpose/mmdet_pretrained/yolo-x_8xb8-300e_coco-face_13274d7c.pth \
-    configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_8xb64-60e_aflw-256x256.py \
-    https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256-f2bbc62b_20210125.pth \
-    --input demo/resources/<demo_face.mp4> \
-    --show --draw-heatmap --output-root vis_results
+python demo/topdown_demo_with_mmdet.py     demo/mmdetection_cfg/yolox-s_8xb8-300e_coco-face.py     https://download.openmmlab.com/mmpose/mmdet_pretrained/yolo-x_8xb8-300e_coco-face_13274d7c.pth     configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_8xb64-60e_aflw-256x256.py     https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256-f2bbc62b_20210125.pth     --input data/demo/demo_face.mp4 --draw-heatmap --output-root vis_results --device cpu
 ```
 
-<img src="https://user-images.githubusercontent.com/87690686/190858159-b224b06a-7d34-4716-a8bc-4d127a39b90c.gif" height="500px" alt><br>
+<img src="https://user-images.githubusercontent.com/26127467/220541430-6ade5a59-3d06-466a-a94d-00c82ff96a00.gif" height="500px" alt><br>
 
 The original video can be downloaded from [Google Drive](https://drive.google.com/file/d/1kQt80t6w802b_vgVcmiV_QfcSJ3RWzmb/view?usp=sharing).
 

From d80851c2f48a6547727dc7714a6ca91498518747 Mon Sep 17 00:00:00 2001
From: lupeng <penglu2097@gmail.com>
Date: Wed, 22 Feb 2023 14:43:22 +0800
Subject: [PATCH 17/17] fix video demo command

---
 demo/docs/2d_face_demo.md | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/demo/docs/2d_face_demo.md b/demo/docs/2d_face_demo.md
index f248077821..09b8ceb330 100644
--- a/demo/docs/2d_face_demo.md
+++ b/demo/docs/2d_face_demo.md
@@ -66,7 +66,13 @@ python demo/topdown_demo_with_mmdet.py \
 Videos share the same interface with images. The difference is that the `${INPUT_PATH}` for videos can be the local path or **URL** link to video file.
 
 ```shell
-python demo/topdown_demo_with_mmdet.py     demo/mmdetection_cfg/yolox-s_8xb8-300e_coco-face.py     https://download.openmmlab.com/mmpose/mmdet_pretrained/yolo-x_8xb8-300e_coco-face_13274d7c.pth     configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_8xb64-60e_aflw-256x256.py     https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256-f2bbc62b_20210125.pth     --input data/demo/demo_face.mp4 --draw-heatmap --output-root vis_results --device cpu
+python demo/topdown_demo_with_mmdet.py \
+    demo/mmdetection_cfg/yolox-s_8xb8-300e_coco-face.py \
+    https://download.openmmlab.com/mmpose/mmdet_pretrained/yolo-x_8xb8-300e_coco-face_13274d7c.pth \
+    configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_8xb64-60e_aflw-256x256.py \
+    https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256-f2bbc62b_20210125.pth \
+    --input demo/resources/<demo_face.mp4> \
+    --show --draw-heatmap --output-root vis_results
 ```
 
 <img src="https://user-images.githubusercontent.com/26127467/220541430-6ade5a59-3d06-466a-a94d-00c82ff96a00.gif" height="500px" alt><br>