diff --git a/README.md b/README.md index 951c4adf2e..c14110ffd3 100644 --- a/README.md +++ b/README.md @@ -284,6 +284,7 @@ A summary can be found in the [Model Zoo](https://mmpose.readthedocs.io/en/lates - [x] [InterHand2.6M](https://mmpose.readthedocs.io/en/latest/model_zoo_papers/datasets.html#interhand2-6m-eccv-2020) \[[homepage](https://mks0601.github.io/InterHand2.6M/)\] (ECCV'2020) - [x] [AP-10K](https://mmpose.readthedocs.io/en/latest/model_zoo_papers/datasets.html#ap-10k-neurips-2021) \[[homepage](https://github.com/AlexTheBad/AP-10K)\] (NeurIPS'2021) - [x] [Horse-10](https://mmpose.readthedocs.io/en/latest/model_zoo_papers/datasets.html#horse-10-wacv-2021) \[[homepage](http://www.mackenziemathislab.org/horse10)\] (WACV'2021) +- [x] [Human-Art](#todo) \[[homepage](https://idea-research.github.io/HumanArt/)\] (CVPR'2023) diff --git a/README_CN.md b/README_CN.md index 49a956cab9..f3a6af62f9 100644 --- a/README_CN.md +++ b/README_CN.md @@ -282,6 +282,7 @@ MMPose v1.0.0 是一个重大更新,包括了大量的 API 和配置文件的 - [x] [InterHand2.6M](https://mmpose.readthedocs.io/zh_CN/latest/model_zoo_papers/datasets.html#interhand2-6m-eccv-2020) \[[主页](https://mks0601.github.io/InterHand2.6M/)\] (ECCV'2020) - [x] [AP-10K](https://mmpose.readthedocs.io/en/latest/model_zoo_papers/datasets.html#ap-10k-neurips-2021) \[[主页](https://github.com/AlexTheBad/AP-10K)\] (NeurIPS'2021) - [x] [Horse-10](https://mmpose.readthedocs.io/zh_CN/latest/model_zoo_papers/datasets.html#horse-10-wacv-2021) \[[主页](http://www.mackenziemathislab.org/horse10)\] (WACV'2021) +- [x] [Human-Art](#todo) \[[homepage](https://idea-research.github.io/HumanArt/)\] (CVPR'2023) diff --git a/configs/_base_/datasets/humanart.py b/configs/_base_/datasets/humanart.py new file mode 100644 index 0000000000..b549269b69 --- /dev/null +++ b/configs/_base_/datasets/humanart.py @@ -0,0 +1,181 @@ +dataset_info = dict( + dataset_name='Human-Art', + paper_info=dict( + author='Ju, Xuan and Zeng, Ailing and ' + 'Wang, Jianan and Xu, Qiang and Zhang, Lei', + title='Human-Art: A Versatile Human-Centric Dataset ' + 'Bridging Natural and Artificial Scenes', + container='Proceedings of the IEEE/CVF Conference on ' + 'Computer Vision and Pattern Recognition', + year='2023', + homepage='https://idea-research.github.io/HumanArt/', + ), + keypoint_info={ + 0: + dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''), + 1: + dict( + name='left_eye', + id=1, + color=[51, 153, 255], + type='upper', + swap='right_eye'), + 2: + dict( + name='right_eye', + id=2, + color=[51, 153, 255], + type='upper', + swap='left_eye'), + 3: + dict( + name='left_ear', + id=3, + color=[51, 153, 255], + type='upper', + swap='right_ear'), + 4: + dict( + name='right_ear', + id=4, + color=[51, 153, 255], + type='upper', + swap='left_ear'), + 5: + dict( + name='left_shoulder', + id=5, + color=[0, 255, 0], + type='upper', + swap='right_shoulder'), + 6: + dict( + name='right_shoulder', + id=6, + color=[255, 128, 0], + type='upper', + swap='left_shoulder'), + 7: + dict( + name='left_elbow', + id=7, + color=[0, 255, 0], + type='upper', + swap='right_elbow'), + 8: + dict( + name='right_elbow', + id=8, + color=[255, 128, 0], + type='upper', + swap='left_elbow'), + 9: + dict( + name='left_wrist', + id=9, + color=[0, 255, 0], + type='upper', + swap='right_wrist'), + 10: + dict( + name='right_wrist', + id=10, + color=[255, 128, 0], + type='upper', + swap='left_wrist'), + 11: + dict( + name='left_hip', + id=11, + color=[0, 255, 0], + type='lower', + swap='right_hip'), + 12: + dict( + name='right_hip', + id=12, + color=[255, 128, 0], + type='lower', + swap='left_hip'), + 13: + dict( + name='left_knee', + id=13, + color=[0, 255, 0], + type='lower', + swap='right_knee'), + 14: + dict( + name='right_knee', + id=14, + color=[255, 128, 0], + type='lower', + swap='left_knee'), + 15: + dict( + name='left_ankle', + id=15, + color=[0, 255, 0], + type='lower', + swap='right_ankle'), + 16: + dict( + name='right_ankle', + id=16, + color=[255, 128, 0], + type='lower', + swap='left_ankle') + }, + skeleton_info={ + 0: + dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]), + 1: + dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]), + 2: + dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]), + 3: + dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]), + 4: + dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]), + 5: + dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]), + 6: + dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]), + 7: + dict( + link=('left_shoulder', 'right_shoulder'), + id=7, + color=[51, 153, 255]), + 8: + dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]), + 9: + dict( + link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]), + 10: + dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]), + 11: + dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]), + 12: + dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]), + 13: + dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]), + 14: + dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]), + 15: + dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]), + 16: + dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]), + 17: + dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]), + 18: + dict( + link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255]) + }, + joint_weights=[ + 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, + 1.5 + ], + sigmas=[ + 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, + 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089 + ]) diff --git a/configs/_base_/datasets/humanart_aic.py b/configs/_base_/datasets/humanart_aic.py new file mode 100644 index 0000000000..e999427536 --- /dev/null +++ b/configs/_base_/datasets/humanart_aic.py @@ -0,0 +1,205 @@ +dataset_info = dict( + dataset_name='humanart', + paper_info=[ + dict( + author='Ju, Xuan and Zeng, Ailing and ' + 'Wang, Jianan and Xu, Qiang and Zhang, ' + 'Lei', + title='Human-Art: A Versatile Human-Centric Dataset ' + 'Bridging Natural and Artificial Scenes', + container='CVPR', + year='2023', + homepage='https://idea-research.github.io/HumanArt/', + ), + dict( + author='Wu, Jiahong and Zheng, He and Zhao, Bo and ' + 'Li, Yixin and Yan, Baoming and Liang, Rui and ' + 'Wang, Wenjia and Zhou, Shipei and Lin, Guosen and ' + 'Fu, Yanwei and others', + title='Ai challenger: A large-scale dataset for going ' + 'deeper in image understanding', + container='arXiv', + year='2017', + homepage='https://github.com/AIChallenger/AI_Challenger_2017', + ), + ], + keypoint_info={ + 0: + dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''), + 1: + dict( + name='left_eye', + id=1, + color=[51, 153, 255], + type='upper', + swap='right_eye'), + 2: + dict( + name='right_eye', + id=2, + color=[51, 153, 255], + type='upper', + swap='left_eye'), + 3: + dict( + name='left_ear', + id=3, + color=[51, 153, 255], + type='upper', + swap='right_ear'), + 4: + dict( + name='right_ear', + id=4, + color=[51, 153, 255], + type='upper', + swap='left_ear'), + 5: + dict( + name='left_shoulder', + id=5, + color=[0, 255, 0], + type='upper', + swap='right_shoulder'), + 6: + dict( + name='right_shoulder', + id=6, + color=[255, 128, 0], + type='upper', + swap='left_shoulder'), + 7: + dict( + name='left_elbow', + id=7, + color=[0, 255, 0], + type='upper', + swap='right_elbow'), + 8: + dict( + name='right_elbow', + id=8, + color=[255, 128, 0], + type='upper', + swap='left_elbow'), + 9: + dict( + name='left_wrist', + id=9, + color=[0, 255, 0], + type='upper', + swap='right_wrist'), + 10: + dict( + name='right_wrist', + id=10, + color=[255, 128, 0], + type='upper', + swap='left_wrist'), + 11: + dict( + name='left_hip', + id=11, + color=[0, 255, 0], + type='lower', + swap='right_hip'), + 12: + dict( + name='right_hip', + id=12, + color=[255, 128, 0], + type='lower', + swap='left_hip'), + 13: + dict( + name='left_knee', + id=13, + color=[0, 255, 0], + type='lower', + swap='right_knee'), + 14: + dict( + name='right_knee', + id=14, + color=[255, 128, 0], + type='lower', + swap='left_knee'), + 15: + dict( + name='left_ankle', + id=15, + color=[0, 255, 0], + type='lower', + swap='right_ankle'), + 16: + dict( + name='right_ankle', + id=16, + color=[255, 128, 0], + type='lower', + swap='left_ankle'), + 17: + dict( + name='head_top', + id=17, + color=[51, 153, 255], + type='upper', + swap=''), + 18: + dict(name='neck', id=18, color=[51, 153, 255], type='upper', swap='') + }, + skeleton_info={ + 0: + dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]), + 1: + dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]), + 2: + dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]), + 3: + dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]), + 4: + dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]), + 5: + dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]), + 6: + dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]), + 7: + dict( + link=('left_shoulder', 'right_shoulder'), + id=7, + color=[51, 153, 255]), + 8: + dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]), + 9: + dict( + link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]), + 10: + dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]), + 11: + dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]), + 12: + dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]), + 13: + dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]), + 14: + dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]), + 15: + dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]), + 16: + dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]), + 17: + dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]), + 18: + dict( + link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255]), + 19: + dict(link=('head_top', 'neck'), id=11, color=[51, 153, 255]), + }, + joint_weights=[ + 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, + 1.5, 1.5 + ], + sigmas=[ + 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, + 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089, 0.026, 0.026 + ]) diff --git a/configs/body_2d_keypoint/rtmpose/README.md b/configs/body_2d_keypoint/rtmpose/README.md index 3037974917..38fd938376 100644 --- a/configs/body_2d_keypoint/rtmpose/README.md +++ b/configs/body_2d_keypoint/rtmpose/README.md @@ -37,3 +37,21 @@ Results on CrowdPose test with [YOLOv3](https://github.com/eriklindernoren/PyTor | Model | Input Size | AP | AR | Details and Download | | :-------: | :--------: | :---: | :---: | :------------------------------------------------------: | | RTMPose-m | 256x192 | 0.706 | 0.788 | [rtmpose_crowdpose.md](./crowdpose/rtmpose_crowdpose.md) | + +### Human-Art Dataset + +Results on Human-Art validation dataset with detector having human AP of 56.2 on Human-Art validation dataset + +| Model | Input Size | AP | AR | Details and Download | +| :-------: | :--------: | :---: | :---: | :---------------------------------------------------: | +| RTMPose-s | 256x192 | 0.311 | 0.381 | [rtmpose_humanart.md](./humanart/rtmpose_humanart.md) | +| RTMPose-m | 256x192 | 0.355 | 0.417 | [rtmpose_humanart.md](./humanart/rtmpose_humanart.md) | +| RTMPose-l | 256x192 | 0.378 | 0.442 | [rtmpose_humanart.md](./humanart/rtmpose_humanart.md) | + +Results on Human-Art validation dataset with ground-truth bounding-box + +| Model | Input Size | AP | AR | Details and Download | +| :-------: | :--------: | :---: | :---: | :---------------------------------------------------: | +| RTMPose-s | 256x192 | 0.698 | 0.732 | [rtmpose_humanart.md](./humanart/rtmpose_humanart.md) | +| RTMPose-m | 256x192 | 0.728 | 0.759 | [rtmpose_humanart.md](./humanart/rtmpose_humanart.md) | +| RTMPose-l | 256x192 | 0.753 | 0.783 | [rtmpose_humanart.md](./humanart/rtmpose_humanart.md) | diff --git a/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-l_8xb256-420e_humanart-256x192.py b/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-l_8xb256-420e_humanart-256x192.py new file mode 100644 index 0000000000..384a712d95 --- /dev/null +++ b/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-l_8xb256-420e_humanart-256x192.py @@ -0,0 +1,232 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 420 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 210 to 420 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(192, 256), + sigma=(4.9, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=1., + widen_factor=1., + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=1024, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True)) + +# base dataset settings +dataset_type = 'HumanArtDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/', +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='HumanArt/annotations/training_humanart_coco.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='HumanArt/annotations/validation_humanart.json', + # bbox_file=f'{data_root}HumanArt/person_detection_results/' + # 'HumanArt_validation_detections_AP_H_56_person.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'HumanArt/annotations/validation_humanart.json') +test_evaluator = val_evaluator diff --git a/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-m_8xb256-420e_humanart-256x192.py b/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-m_8xb256-420e_humanart-256x192.py new file mode 100644 index 0000000000..30178cbb6d --- /dev/null +++ b/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-m_8xb256-420e_humanart-256x192.py @@ -0,0 +1,232 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 420 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 210 to 420 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(192, 256), + sigma=(4.9, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=768, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True)) + +# base dataset settings +dataset_type = 'HumanArtDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/', +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='HumanArt/annotations/training_humanart_coco.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='HumanArt/annotations/validation_humanart.json', + # bbox_file=f'{data_root}HumanArt/person_detection_results/' + # 'HumanArt_validation_detections_AP_H_56_person.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'HumanArt/annotations/validation_humanart.json') +test_evaluator = val_evaluator diff --git a/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-s_8xb256-420e_humanart-256x192.py b/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-s_8xb256-420e_humanart-256x192.py new file mode 100644 index 0000000000..b4263f25e7 --- /dev/null +++ b/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-s_8xb256-420e_humanart-256x192.py @@ -0,0 +1,232 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 420 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 210 to 420 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(192, 256), + sigma=(4.9, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.33, + widen_factor=0.5, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmpose/cspnext-s_udp-aic-coco_210e-256x192-92f5a029_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=512, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True)) + +# base dataset settings +dataset_type = 'HumanArtDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/', +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='HumanArt/annotations/training_humanart_coco.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='HumanArt/annotations/validation_humanart.json', + # bbox_file=f'{data_root}HumanArt/person_detection_results/' + # 'HumanArt_validation_detections_AP_H_56_person.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'HumanArt/annotations/validation_humanart.json') +test_evaluator = val_evaluator diff --git a/configs/body_2d_keypoint/rtmpose/humanart/rtmpose_humanart.md b/configs/body_2d_keypoint/rtmpose/humanart/rtmpose_humanart.md new file mode 100644 index 0000000000..bfd925b2c8 --- /dev/null +++ b/configs/body_2d_keypoint/rtmpose/humanart/rtmpose_humanart.md @@ -0,0 +1,110 @@ + + +
+RTMPose (arXiv'2023) + +```bibtex +@misc{https://doi.org/10.48550/arxiv.2303.07399, + doi = {10.48550/ARXIV.2303.07399}, + url = {https://arxiv.org/abs/2303.07399}, + author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai}, + keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose}, + publisher = {arXiv}, + year = {2023}, + copyright = {Creative Commons Attribution 4.0 International} +} + +``` + +
+ + + +
+RTMDet (arXiv'2022) + +```bibtex +@misc{lyu2022rtmdet, + title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors}, + author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen}, + year={2022}, + eprint={2212.07784}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +
+ + + +
+COCO (ECCV'2014) + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +
+ +
+Human-Art (CVPR'2023) + +```bibtex +@inproceedings{ju2023humanart, + title={Human-Art: A Versatile Human-Centric Dataset Bridging Natural and Artificial Scenes}, + author={Ju, Xuan and Zeng, Ailing and Jianan, Wang and Qiang, Xu and Lei, Zhang}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), + year={2023}} +``` + +
+ +Results on Human-Art validation dataset with detector having human AP of 56.2 on Human-Art validation dataset + +| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [rtmpose-s-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py) | 256x192 | 0.199 | 0.328 | 0.198 | 0.261 | 0.418 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-coco_pt-aic-coco_420e-256x192-8edcf0d7_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-coco_pt-aic-coco_420e-256x192-8edcf0d7_20230127.json) | +| [rtmpose-s-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-s_8xb256-420e_humanart-256x192.py) | 256x192 | 0.311 | 0.462 | 0.323 | 0.381 | 0.540 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_8xb256-420e_humanart-256x192-5a3ac943_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_8xb256-420e_humanart-256x192-5a3ac943_20230611.json) | +| [rtmpose-m-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py) | 256x192 | 0.239 | 0.372 | 0.243 | 0.302 | 0.455 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco_pt-aic-coco_420e-256x192-d8dd5ca4_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco_pt-aic-coco_420e-256x192-d8dd5ca4_20230127.json) | +| [rtmpose-m-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-m_8xb256-420e_humanart-256x192.py) | 256x192 | 0.355 | 0.503 | 0.377 | 0.417 | 0.568 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_8xb256-420e_humanart-256x192-8430627b_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_8xb256-420e_humanart-256x192-8430627b_20230611.json) | +| [rtmpose-l-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py) | 256x192 | 0.260 | 0.393 | 0.267 | 0.323 | 0.472 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.json) | +| [rtmpose-l-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-l_8xb256-420e_humanart-256x192.py) | 256x192 | 0.378 | 0.521 | 0.399 | 0.442 | 0.584 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_8xb256-420e_humanart-256x192-389f2cb0_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_8xb256-420e_humanart-256x192-389f2cb0_20230611.json) | + +Results on Human-Art validation dataset with ground-truth bounding-box + +| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [rtmpose-s-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py) | 256x192 | 0.480 | 0.739 | 0.498 | 0.521 | 0.763 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-coco_pt-aic-coco_420e-256x192-8edcf0d7_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-coco_pt-aic-coco_420e-256x192-8edcf0d7_20230127.json) | +| [rtmpose-s-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-s_8xb256-420e_humanart-256x192.py) | 256x192 | 0.698 | 0.893 | 0.768 | 0.732 | 0.903 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_8xb256-420e_humanart-256x192-5a3ac943_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_8xb256-420e_humanart-256x192-5a3ac943_20230611.json) | +| [rtmpose-m-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py) | 256x192 | 0.532 | 0.765 | 0.563 | 0.571 | 0.789 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco_pt-aic-coco_420e-256x192-d8dd5ca4_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco_pt-aic-coco_420e-256x192-d8dd5ca4_20230127.json) | +| [rtmpose-m-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-m_8xb256-420e_humanart-256x192.py) | 256x192 | 0.728 | 0.895 | 0.791 | 0.759 | 0.906 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_8xb256-420e_humanart-256x192-8430627b_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_8xb256-420e_humanart-256x192-8430627b_20230611.json) | +| [rtmpose-l-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py) | 256x192 | 0.564 | 0.789 | 0.602 | 0.599 | 0.808 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.json) | +| [rtmpose-l-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-l_8xb256-420e_humanart-256x192.py) | 256x192 | 0.753 | 0.905 | 0.812 | 0.783 | 0.915 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_8xb256-420e_humanart-256x192-389f2cb0_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_8xb256-420e_humanart-256x192-389f2cb0_20230611.json) | + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [rtmpose-s-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py) | 256x192 | 0.716 | 0.892 | 0.789 | 0.768 | 0.929 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-coco_pt-aic-coco_420e-256x192-8edcf0d7_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-coco_pt-aic-coco_420e-256x192-8edcf0d7_20230127.json) | +| [rtmpose-s-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-s_8xb256-420e_humanart-256x192.py) | 256x192 | 0.706 | 0.888 | 0.780 | 0.759 | 0.928 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_8xb256-420e_humanart-256x192-5a3ac943_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_8xb256-420e_humanart-256x192-5a3ac943_20230611.json) | +| [rtmpose-m-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py) | 256x192 | 0.746 | 0.899 | 0.817 | 0.795 | 0.935 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco_pt-aic-coco_420e-256x192-d8dd5ca4_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco_pt-aic-coco_420e-256x192-d8dd5ca4_20230127.json) | +| [rtmpose-m-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-m_8xb256-420e_humanart-256x192.py) | 256x192 | 0.725 | 0.892 | 0.795 | 0.775 | 0.929 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_8xb256-420e_humanart-256x192-8430627b_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_8xb256-420e_humanart-256x192-8430627b_20230611.json) | +| [rtmpose-l-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py) | 256x192 | 0.758 | 0.906 | 0.826 | 0.806 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.json) | +| [rtmpose-l-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-l_8xb256-420e_humanart-256x192.py) | 256x192 | 0.748 | 0.901 | 0.816 | 0.796 | 0.938 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_8xb256-420e_humanart-256x192-389f2cb0_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_8xb256-420e_humanart-256x192-389f2cb0_20230611.json) | + +Results on COCO val2017 with ground-truth bounding box + +| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [rtmpose-s-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-s_8xb256-420e_humanart-256x192.py) | 256x192 | 0.725 | 0.916 | 0.798 | 0.753 | 0.925 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_8xb256-420e_humanart-256x192-5a3ac943_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_8xb256-420e_humanart-256x192-5a3ac943_20230611.json) | +| [rtmpose-m-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-m_8xb256-420e_humanart-256x192.py) | 256x192 | 0.744 | 0.916 | 0.818 | 0.770 | 0.930 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_8xb256-420e_humanart-256x192-8430627b_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_8xb256-420e_humanart-256x192-8430627b_20230611.json) | +| [rtmpose-l-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-l_8xb256-420e_humanart-256x192.py) | 256x192 | 0.770 | 0.927 | 0.840 | 0.794 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_8xb256-420e_humanart-256x192-389f2cb0_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_8xb256-420e_humanart-256x192-389f2cb0_20230611.json) | diff --git a/configs/body_2d_keypoint/rtmpose/humanart/rtmpose_humanart.yml b/configs/body_2d_keypoint/rtmpose/humanart/rtmpose_humanart.yml new file mode 100644 index 0000000000..f0f21b2d6f --- /dev/null +++ b/configs/body_2d_keypoint/rtmpose/humanart/rtmpose_humanart.yml @@ -0,0 +1,106 @@ +Collections: +- Name: RTMPose + Paper: + Title: "RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose" + URL: https://arxiv.org/abs/2303.07399 + README: https://github.com/open-mmlab/mmpose/blob/main/projects/rtmpose/README.md +Models: +- Config: configs/body_2d_keypoint/rtmpose/humanart/rtmpose-l_8xb256-420e_humanart-256x192.py + In Collection: RTMPose + Metadata: + Architecture: &id001 + - RTMPose + Training Data: &id002 + - COCO + - Human-Art + Name: rtmpose-l_8xb256-420e_humanart-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.748 + AP@0.5: 0.901 + AP@0.75: 0.816 + AR: 0.796 + AR@0.5: 0.938 + Task: Body 2D Keypoint + - Dataset: Human-Art + Metrics: + AP: 0.378 + AP@0.5: 0.521 + AP@0.75: 0.399 + AR: 0.442 + AR@0.5: 0.584 + Task: Body 2D Keypoint + - Dataset: Human-Art(GT) + Metrics: + AP: 0.753 + AP@0.5: 0.905 + AP@0.75: 0.812 + AR: 0.783 + AR@0.5: 0.915 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_8xb256-420e_humanart-256x192-389f2cb0_20230611.pth +- Config: configs/body_2d_keypoint/rtmpose/humanart/rtmpose-m_8xb256-420e_humanart-256x192.py + In Collection: RTMPose + Metadata: + Architecture: *id001 + Training Data: *id002 + Name: rtmpose-m_8xb256-420e_humanart-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.725 + AP@0.5: 0.892 + AP@0.75: 0.795 + AR: 0.775 + AR@0.5: 0.929 + Task: Body 2D Keypoint + - Dataset: Human-Art + Metrics: + AP: 0.355 + AP@0.5: 0.503 + AP@0.75: 0.377 + AR: 0.417 + AR@0.5: 0.568 + Task: Body 2D Keypoint + - Dataset: Human-Art(GT) + Metrics: + AP: 0.728 + AP@0.5: 0.895 + AP@0.75: 0.791 + AR: 0.759 + AR@0.5: 0.906 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_8xb256-420e_humanart-256x192-8430627b_20230611.pth +- Config: configs/body_2d_keypoint/rtmpose/humanart/rtmpose-s_8xb256-420e_humanart-256x192.py + In Collection: RTMPose + Metadata: + Architecture: *id001 + Training Data: *id002 + Name: rtmpose-s_8xb256-420e_humanart-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.706 + AP@0.5: 0.888 + AP@0.75: 0.780 + AR: 0.759 + AR@0.5: 0.928 + Task: Body 2D Keypoint + - Dataset: Human-Art + Metrics: + AP: 0.311 + AP@0.5: 0.462 + AP@0.75: 0.323 + AR: 0.381 + AR@0.5: 0.540 + Task: Body 2D Keypoint + - Dataset: Human-Art(GT) + Metrics: + AP: 0.698 + AP@0.5: 0.893 + AP@0.75: 0.768 + AR: 0.732 + AR@0.5: 0.903 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_8xb256-420e_humanart-256x192-5a3ac943_20230611.pth diff --git a/configs/body_2d_keypoint/topdown_heatmap/README.md b/configs/body_2d_keypoint/topdown_heatmap/README.md index 9e23b874bc..47aae219e4 100644 --- a/configs/body_2d_keypoint/topdown_heatmap/README.md +++ b/configs/body_2d_keypoint/topdown_heatmap/README.md @@ -115,3 +115,19 @@ Results on PoseTrack2018 val with ground-truth bounding boxes. | HRNet-w48 | 256x192 | 84.6 | [hrnet_posetrack18.md](./posetrack18/hrnet_posetrack18.md) | | HRNet-w32 | 256x192 | 83.4 | [hrnet_posetrack18.md](./posetrack18/hrnet_posetrack18.md) | | ResNet-50 | 256x192 | 81.2 | [resnet_posetrack18.md](./posetrack18/resnet_posetrack18.md) | + +### Human-Art Dataset + +Results on Human-Art validation dataset with detector having human AP of 56.2 on Human-Art validation dataset + +| Model | Input Size | AP | AR | Details and Download | +| :-------: | :--------: | :---: | :---: | :---------------------------------------------------: | +| ViTPose-s | 256x192 | 0.381 | 0.448 | [vitpose_humanart.md](./humanart/vitpose_humanart.md) | +| ViTPose-b | 256x192 | 0.410 | 0.475 | [vitpose_humanart.md](./humanart/vitpose_humanart.md) | + +Results on Human-Art validation dataset with ground-truth bounding-box + +| Model | Input Size | AP | AR | Details and Download | +| :-------: | :--------: | :---: | :---: | :---------------------------------------------------: | +| ViTPose-s | 256x192 | 0.738 | 0.768 | [vitpose_humanart.md](./humanart/vitpose_humanart.md) | +| ViTPose-b | 256x192 | 0.759 | 0.790 | [vitpose_humanart.md](./humanart/vitpose_humanart.md) | diff --git a/configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-base_8xb64-210e_humanart-256x192.py b/configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-base_8xb64-210e_humanart-256x192.py new file mode 100644 index 0000000000..6f08f404fb --- /dev/null +++ b/configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-base_8xb64-210e_humanart-256x192.py @@ -0,0 +1,150 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +custom_imports = dict( + imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'], + allow_failed_imports=False) + +optim_wrapper = dict( + optimizer=dict( + type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1), + paramwise_cfg=dict( + num_layers=12, + layer_decay_rate=0.75, + custom_keys={ + 'bias': dict(decay_multi=0.0), + 'pos_embed': dict(decay_mult=0.0), + 'relative_position_bias_table': dict(decay_mult=0.0), + 'norm': dict(decay_mult=0.0), + }, + ), + constructor='LayerDecayOptimWrapperConstructor', + clip_grad=dict(max_norm=1., norm_type=2), +) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='mmcls.VisionTransformer', + arch='base', + img_size=(256, 192), + patch_size=16, + qkv_bias=True, + drop_path_rate=0.3, + with_cls_token=False, + output_cls_token=False, + patch_cfg=dict(padding=2), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'v1/pretrained_models/mae_pretrain_vit_base.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=768, + out_channels=17, + deconv_out_channels=(256, 256), + deconv_kernel_sizes=(4, 4), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +data_root = 'data/' +dataset_type = 'HumanArtDataset' +data_mode = 'topdown' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='HumanArt/annotations/training_humanart_coco.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='HumanArt/annotations/validation_humanart.json', + bbox_file=f'{data_root}HumanArt/person_detection_results/' + 'HumanArt_validation_detections_AP_H_56_person.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'HumanArt/annotations/validation_humanart.json') +test_evaluator = val_evaluator diff --git a/configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-small_8xb64-210e_humanart-256x192.py b/configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-small_8xb64-210e_humanart-256x192.py new file mode 100644 index 0000000000..6daf87cc90 --- /dev/null +++ b/configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-small_8xb64-210e_humanart-256x192.py @@ -0,0 +1,155 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +custom_imports = dict( + imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'], + allow_failed_imports=False) + +optim_wrapper = dict( + optimizer=dict( + type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1), + paramwise_cfg=dict( + num_layers=12, + layer_decay_rate=0.8, + custom_keys={ + 'bias': dict(decay_multi=0.0), + 'pos_embed': dict(decay_mult=0.0), + 'relative_position_bias_table': dict(decay_mult=0.0), + 'norm': dict(decay_mult=0.0), + }, + ), + constructor='LayerDecayOptimWrapperConstructor', + clip_grad=dict(max_norm=1., norm_type=2), +) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='mmcls.VisionTransformer', + arch={ + 'embed_dims': 384, + 'num_layers': 12, + 'num_heads': 12, + 'feedforward_channels': 384 * 4 + }, + img_size=(256, 192), + patch_size=16, + qkv_bias=True, + drop_path_rate=0.1, + with_cls_token=False, + output_cls_token=False, + patch_cfg=dict(padding=2), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'v1/pretrained_models/mae_pretrain_vit_small.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=384, + out_channels=17, + deconv_out_channels=(256, 256), + deconv_kernel_sizes=(4, 4), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +data_root = 'data/' +dataset_type = 'HumanArtDataset' +data_mode = 'topdown' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='HumanArt/annotations/training_humanart_coco.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='HumanArt/annotations/validation_humanart.json', + bbox_file=f'{data_root}HumanArt/person_detection_results/' + 'HumanArt_validation_detections_AP_H_56_person.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'HumanArt/annotations/validation_humanart.json') +test_evaluator = val_evaluator diff --git a/configs/body_2d_keypoint/topdown_heatmap/humanart/vitpose_humanart.md b/configs/body_2d_keypoint/topdown_heatmap/humanart/vitpose_humanart.md new file mode 100644 index 0000000000..1e559aa4da --- /dev/null +++ b/configs/body_2d_keypoint/topdown_heatmap/humanart/vitpose_humanart.md @@ -0,0 +1,85 @@ +To utilize ViTPose, you'll need to have [MMClassification](https://github.com/open-mmlab/mmclassification). To install the required version, run the following command: + +```shell +mim install 'mmcls>=1.0.0rc5' +``` + + + +
+ +ViTPose (NeurIPS'2022) + +```bibtex +@inproceedings{ + xu2022vitpose, + title={Vi{TP}ose: Simple Vision Transformer Baselines for Human Pose Estimation}, + author={Yufei Xu and Jing Zhang and Qiming Zhang and Dacheng Tao}, + booktitle={Advances in Neural Information Processing Systems}, + year={2022}, +} +``` + +
+ + + +
+COCO-WholeBody (ECCV'2020) + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +
+ +
+Human-Art (CVPR'2023) + +```bibtex +@inproceedings{ju2023humanart, + title={Human-Art: A Versatile Human-Centric Dataset Bridging Natural and Artificial Scenes}, + author={Ju, Xuan and Zeng, Ailing and Jianan, Wang and Qiang, Xu and Lei, Zhang}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), + year={2023}} +``` + +
+ +Results on Human-Art validation dataset with detector having human AP of 56.2 on Human-Art validation dataset + +> With classic decoder + +| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [ViTPose-S-coco](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192.py) | 256x192 | 0.228 | 0.371 | 0.229 | 0.298 | 0.467 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192-62d7a712_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192-62d7a712_20230314.json) | +| [ViTPose-S-humanart-coco](configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-small_8xb64-210e_humanart-256x192.py) | 256x192 | 0.381 | 0.532 | 0.405 | 0.448 | 0.602 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-small_8xb64-210e_humanart-256x192-5cbe2bfc_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-small_8xb64-210e_humanart-256x192-5cbe2bfc_20230611.json) | +| [ViTPose-B-coco](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192.py) | 256x192 | 0.270 | 0.423 | 0.272 | 0.340 | 0.510 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192-216eae50_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192-216eae50_20230314.json) | +| [ViTPose-B-humanart-coco](configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-base_8xb64-210e_humanart-256x192.py) | 256x192 | 0.410 | 0.549 | 0.434 | 0.475 | 0.615 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-base_8xb64-210e_humanart-256x192-b417f546_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-base_8xb64-210e_humanart-256x192-b417f546_20230611.json) | + +Results on Human-Art validation dataset with ground-truth bounding-box + +> With classic decoder + +| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [ViTPose-S-coco](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192.py) | 256x192 | 0.507 | 0.758 | 0.531 | 0.551 | 0.780 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192-62d7a712_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192-62d7a712_20230314.json) | +| [ViTPose-S-humanart-coco](configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-small_8xb64-210e_humanart-256x192.py) | 256x192 | 0.738 | 0.905 | 0.802 | 0.768 | 0.911 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-small_8xb64-210e_humanart-256x192-5cbe2bfc_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-small_8xb64-210e_humanart-256x192-5cbe2bfc_20230611.json) | +| [ViTPose-B-coco](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192.py) | 256x192 | 0.555 | 0.782 | 0.590 | 0.599 | 0.809 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192-216eae50_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192-216eae50_20230314.json) | +| [ViTPose-B-humanart-coco](configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-base_8xb64-210e_humanart-256x192.py) | 256x192 | 0.759 | 0.905 | 0.823 | 0.790 | 0.917 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-base_8xb64-210e_humanart-256x192-b417f546_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-base_8xb64-210e_humanart-256x192-b417f546_20230611.json) | + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +> With classic decoder + +| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [ViTPose-S-coco](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192.py) | 256x192 | 0.739 | 0.903 | 0.816 | 0.792 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192-62d7a712_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192-62d7a712_20230314.json) | +| [ViTPose-S-humanart-coco](configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-small_8xb64-210e_humanart-256x192.py) | 256x192 | 0.737 | 0.902 | 0.811 | 0.792 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-small_8xb64-210e_humanart-256x192-5cbe2bfc_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-small_8xb64-210e_humanart-256x192-5cbe2bfc_20230611.json) | +| [ViTPose-B-coco](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192.py) | 256x192 | 0.757 | 0.905 | 0.829 | 0.810 | 0.946 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192-216eae50_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192-216eae50_20230314.json) | +| [ViTPose-B-humanart-coco](configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-base_8xb64-210e_humanart-256x192.py) | 256x192 | 0.758 | 0.906 | 0.829 | 0.812 | 0.946 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-base_8xb64-210e_humanart-256x192-b417f546_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-base_8xb64-210e_humanart-256x192-b417f546_20230611.json) | diff --git a/configs/body_2d_keypoint/topdown_heatmap/humanart/vitpose_humanart.yml b/configs/body_2d_keypoint/topdown_heatmap/humanart/vitpose_humanart.yml new file mode 100644 index 0000000000..12a557fbf6 --- /dev/null +++ b/configs/body_2d_keypoint/topdown_heatmap/humanart/vitpose_humanart.yml @@ -0,0 +1,79 @@ +Collections: +- Name: ViTPose + Paper: + Title: 'ViTPose: Simple Vision Transformer Baselines for Human Pose Estimation' + URL: https://arxiv.org/abs/2204.12484 + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/vitpose.md + Metadata: + Training Resources: 8x A100 GPUs +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-small_8xb64-210e_humanart-256x192.py + In Collection: ViTPose + Metadata: + Architecture: &id001 + - ViTPose + - Classic Head + Model Size: Small + Training Data: &id002 + - COCO + - Human-Art + Name: td-hm_ViTPose-small_8xb64-210e_humanart-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.737 + AP@0.5: 0.902 + AP@0.75: 0.811 + AR: 0.792 + AR@0.5: 0.942 + Task: Body 2D Keypoint + - Dataset: Human-Art + Metrics: + AP: 0.381 + AP@0.5: 0.532 + AP@0.75: 0.405 + AR: 0.448 + AR@0.5: 0.602 + Task: Body 2D Keypoint + - Dataset: Human-Art(GT) + Metrics: + AP: 0.738 + AP@0.5: 0.905 + AP@0.75: 0.802 + AR: 0.768 + AR@0.5: 0.911 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-small_8xb64-210e_humanart-256x192-5cbe2bfc_20230611.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-base_8xb64-210e_humanart-256x192.py + In Collection: ViTPose + Metadata: + Architecture: *id001 + Model Size: Base + Training Data: *id002 + Name: td-hm_ViTPose-base_8xb64-210e_humanart-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.758 + AP@0.5: 0.906 + AP@0.75: 0.829 + AR: 0.812 + AR@0.5: 0.946 + Task: Body 2D Keypoint + - Dataset: Human-Art + Metrics: + AP: 0.410 + AP@0.5: 0.549 + AP@0.75: 0.434 + AR: 0.475 + AR@0.5: 0.615 + Task: Body 2D Keypoint + - Dataset: Human-Art(GT) + Metrics: + AP: 0.759 + AP@0.5: 0.905 + AP@0.75: 0.823 + AR: 0.790 + AR@0.5: 0.917 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-base_8xb64-210e_humanart-256x192-b417f546_20230611.pth diff --git a/docs/en/dataset_zoo/2d_body_keypoint.md b/docs/en/dataset_zoo/2d_body_keypoint.md index c5bf70a3f8..4448ebe8f4 100644 --- a/docs/en/dataset_zoo/2d_body_keypoint.md +++ b/docs/en/dataset_zoo/2d_body_keypoint.md @@ -13,6 +13,7 @@ MMPose supported datasets: - [CrowdPose](#crowdpose) \[ [Homepage](https://github.com/Jeff-sjtu/CrowdPose) \] - [OCHuman](#ochuman) \[ [Homepage](https://github.com/liruilong940607/OCHumanApi) \] - [MHP](#mhp) \[ [Homepage](https://lv-mhp.github.io/dataset) \] + - [Human-Art](#humanart) \[ [Homepage](https://idea-research.github.io/HumanArt/) \] - Videos - [PoseTrack18](#posetrack18) \[ [Homepage](https://posetrack.net/users/download.php) \] - [sub-JHMDB](#sub-jhmdb-dataset) \[ [Homepage](http://jhmdb.is.tue.mpg.de/dataset) \] @@ -386,6 +387,57 @@ mmpose │ │ │-- ...~~~~ ``` +## Human-Art dataset + + + +
+Human-Art (CVPR'2023) + +```bibtex +@inproceedings{ju2023humanart, + title={Human-Art: A Versatile Human-Centric Dataset Bridging Natural and Artificial Scenes}, + author={Ju, Xuan and Zeng, Ailing and Jianan, Wang and Qiang, Xu and Lei, Zhang}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), + year={2023}} +``` + +
+ +
+ +
+ +For [Human-Art](https://idea-research.github.io/HumanArt/) data, please download the images and annotation files from [its website](https://idea-research.github.io/HumanArt/). You need to fill in the [data form](https://docs.google.com/forms/d/e/1FAIpQLScroT_jvw6B9U2Qca1_cl5Kmmu1ceKtlh6DJNmWLte8xNEhEw/viewform) to get access to the data. +Move them under $MMPOSE/data, and make them look like this: + +```text +mmpose +├── mmpose +├── docs +├── tests +├── tools +├── configs +|── data + │── HumanArt + │-- images + │ │-- 2D_virtual_human + │ │ |-- cartoon + │ │ | |-- 000000000000.jpg + │ │ | |-- ... + │ │ |-- digital_art + │ │ |-- ... + │ |-- 3D_virtual_human + │ |-- real_human + |-- annotations + │ │-- validation_humanart.json + │ │-- training_humanart_coco.json + |-- person_detection_results + │ │-- HumanArt_validation_detections_AP_H_56_person.json +``` + +You can choose whether to download other annotation files in Human-Art. If you want to use additional annotation files (e.g. validation set of cartoon), you need to edit the corresponding code in config file. + ## PoseTrack18 diff --git a/docs/zh_cn/dataset_zoo/2d_body_keypoint.md b/docs/zh_cn/dataset_zoo/2d_body_keypoint.md index c5bf70a3f8..4448ebe8f4 100644 --- a/docs/zh_cn/dataset_zoo/2d_body_keypoint.md +++ b/docs/zh_cn/dataset_zoo/2d_body_keypoint.md @@ -13,6 +13,7 @@ MMPose supported datasets: - [CrowdPose](#crowdpose) \[ [Homepage](https://github.com/Jeff-sjtu/CrowdPose) \] - [OCHuman](#ochuman) \[ [Homepage](https://github.com/liruilong940607/OCHumanApi) \] - [MHP](#mhp) \[ [Homepage](https://lv-mhp.github.io/dataset) \] + - [Human-Art](#humanart) \[ [Homepage](https://idea-research.github.io/HumanArt/) \] - Videos - [PoseTrack18](#posetrack18) \[ [Homepage](https://posetrack.net/users/download.php) \] - [sub-JHMDB](#sub-jhmdb-dataset) \[ [Homepage](http://jhmdb.is.tue.mpg.de/dataset) \] @@ -386,6 +387,57 @@ mmpose │ │ │-- ...~~~~ ``` +## Human-Art dataset + + + +
+Human-Art (CVPR'2023) + +```bibtex +@inproceedings{ju2023humanart, + title={Human-Art: A Versatile Human-Centric Dataset Bridging Natural and Artificial Scenes}, + author={Ju, Xuan and Zeng, Ailing and Jianan, Wang and Qiang, Xu and Lei, Zhang}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), + year={2023}} +``` + +
+ +
+ +
+ +For [Human-Art](https://idea-research.github.io/HumanArt/) data, please download the images and annotation files from [its website](https://idea-research.github.io/HumanArt/). You need to fill in the [data form](https://docs.google.com/forms/d/e/1FAIpQLScroT_jvw6B9U2Qca1_cl5Kmmu1ceKtlh6DJNmWLte8xNEhEw/viewform) to get access to the data. +Move them under $MMPOSE/data, and make them look like this: + +```text +mmpose +├── mmpose +├── docs +├── tests +├── tools +├── configs +|── data + │── HumanArt + │-- images + │ │-- 2D_virtual_human + │ │ |-- cartoon + │ │ | |-- 000000000000.jpg + │ │ | |-- ... + │ │ |-- digital_art + │ │ |-- ... + │ |-- 3D_virtual_human + │ |-- real_human + |-- annotations + │ │-- validation_humanart.json + │ │-- training_humanart_coco.json + |-- person_detection_results + │ │-- HumanArt_validation_detections_AP_H_56_person.json +``` + +You can choose whether to download other annotation files in Human-Art. If you want to use additional annotation files (e.g. validation set of cartoon), you need to edit the corresponding code in config file. + ## PoseTrack18 diff --git a/mmpose/datasets/datasets/body/__init__.py b/mmpose/datasets/datasets/body/__init__.py index a4aeef8519..1405b0d675 100644 --- a/mmpose/datasets/datasets/body/__init__.py +++ b/mmpose/datasets/datasets/body/__init__.py @@ -2,6 +2,7 @@ from .aic_dataset import AicDataset from .coco_dataset import CocoDataset from .crowdpose_dataset import CrowdPoseDataset +from .humanart_dataset import HumanArtDataset from .jhmdb_dataset import JhmdbDataset from .mhp_dataset import MhpDataset from .mpii_dataset import MpiiDataset @@ -13,5 +14,5 @@ __all__ = [ 'CocoDataset', 'MpiiDataset', 'MpiiTrbDataset', 'AicDataset', 'CrowdPoseDataset', 'OCHumanDataset', 'MhpDataset', 'PoseTrack18Dataset', - 'JhmdbDataset', 'PoseTrack18VideoDataset' + 'JhmdbDataset', 'PoseTrack18VideoDataset', 'HumanArtDataset' ] diff --git a/mmpose/datasets/datasets/body/humanart_dataset.py b/mmpose/datasets/datasets/body/humanart_dataset.py new file mode 100644 index 0000000000..719f35fc9e --- /dev/null +++ b/mmpose/datasets/datasets/body/humanart_dataset.py @@ -0,0 +1,73 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmpose.registry import DATASETS +from ..base import BaseCocoStyleDataset + + +@DATASETS.register_module() +class HumanArtDataset(BaseCocoStyleDataset): + """Human-Art dataset for pose estimation. + + "Human-Art: A Versatile Human-Centric Dataset + Bridging Natural and Artificial Scenes", CVPR'2023. + More details can be found in the `paper + `__ . + + Human-Art keypoints:: + + 0: 'nose', + 1: 'left_eye', + 2: 'right_eye', + 3: 'left_ear', + 4: 'right_ear', + 5: 'left_shoulder', + 6: 'right_shoulder', + 7: 'left_elbow', + 8: 'right_elbow', + 9: 'left_wrist', + 10: 'right_wrist', + 11: 'left_hip', + 12: 'right_hip', + 13: 'left_knee', + 14: 'right_knee', + 15: 'left_ankle', + 16: 'right_ankle' + + Args: + ann_file (str): Annotation file path. Default: ''. + bbox_file (str, optional): Detection result file path. If + ``bbox_file`` is set, detected bboxes loaded from this file will + be used instead of ground-truth bboxes. This setting is only for + evaluation, i.e., ignored when ``test_mode`` is ``False``. + Default: ``None``. + data_mode (str): Specifies the mode of data samples: ``'topdown'`` or + ``'bottomup'``. In ``'topdown'`` mode, each data sample contains + one instance; while in ``'bottomup'`` mode, each data sample + contains all instances in a image. Default: ``'topdown'`` + metainfo (dict, optional): Meta information for dataset, such as class + information. Default: ``None``. + data_root (str, optional): The root directory for ``data_prefix`` and + ``ann_file``. Default: ``None``. + data_prefix (dict, optional): Prefix for training data. Default: + ``dict(img=None, ann=None)``. + filter_cfg (dict, optional): Config for filter data. Default: `None`. + indices (int or Sequence[int], optional): Support using first few + data in annotation file to facilitate training/testing on a smaller + dataset. Default: ``None`` which means using all ``data_infos``. + serialize_data (bool, optional): Whether to hold memory using + serialized objects, when enabled, data loader workers can use + shared RAM from master process instead of making a copy. + Default: ``True``. + pipeline (list, optional): Processing pipeline. Default: []. + test_mode (bool, optional): ``test_mode=True`` means in test phase. + Default: ``False``. + lazy_init (bool, optional): Whether to load annotation during + instantiation. In some cases, such as visualization, only the meta + information of the dataset is needed, which is not necessary to + load annotation file. ``Basedataset`` can skip load annotations to + save time by set ``lazy_init=False``. Default: ``False``. + max_refetch (int, optional): If ``Basedataset.prepare_data`` get a + None img. The maximum extra number of cycles to get a valid + image. Default: 1000. + """ + + METAINFO: dict = dict(from_file='configs/_base_/datasets/humanart.py') diff --git a/tests/data/humanart/2D_virtual_human/digital_art/000000001648.jpg b/tests/data/humanart/2D_virtual_human/digital_art/000000001648.jpg new file mode 100644 index 0000000000..8f2202760b Binary files /dev/null and b/tests/data/humanart/2D_virtual_human/digital_art/000000001648.jpg differ diff --git a/tests/data/humanart/3D_virtual_human/garage_kits/000000005603.jpg b/tests/data/humanart/3D_virtual_human/garage_kits/000000005603.jpg new file mode 100644 index 0000000000..21f551c324 Binary files /dev/null and b/tests/data/humanart/3D_virtual_human/garage_kits/000000005603.jpg differ diff --git a/tests/data/humanart/real_human/acrobatics/000000000590.jpg b/tests/data/humanart/real_human/acrobatics/000000000590.jpg new file mode 100644 index 0000000000..15efbec533 Binary files /dev/null and b/tests/data/humanart/real_human/acrobatics/000000000590.jpg differ diff --git a/tests/data/humanart/test_humanart.json b/tests/data/humanart/test_humanart.json new file mode 100644 index 0000000000..8cf13e3530 --- /dev/null +++ b/tests/data/humanart/test_humanart.json @@ -0,0 +1,716 @@ +{ + "info": { + "description": "For testing Human-Art dataset only.", + "year": 2023, + "date_created": "2023/06/12" + }, + "images": [ + { + "file_name": "HumanArt/images/2D_virtual_human/digital_art/000000001648.jpg", + "height": 1750, + "width": 1280, + "id": 2000000001648, + "page_url": "https://www.deviantart.com/endemilk/art/Autumn-Mood-857953165", + "image_url": "https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/cef0f0b2-832e-4f53-95c6-32f822f796ac/de6swwd-8ae0bba7-f879-43db-9f34-33d067ea3683.png/v1/fill/w_1280,h_1750,q_80,strp/autumn_mood_by_endemilk_de6swwd-fullview.jpg?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ1cm46YXBwOjdlMGQxODg5ODIyNjQzNzNhNWYwZDQxNWVhMGQyNmUwIiwiaXNzIjoidXJuOmFwcDo3ZTBkMTg4OTgyMjY0MzczYTVmMGQ0MTVlYTBkMjZlMCIsIm9iaiI6W1t7ImhlaWdodCI6Ijw9MTc1MCIsInBhdGgiOiJcL2ZcL2NlZjBmMGIyLTgzMmUtNGY1My05NWM2LTMyZjgyMmY3OTZhY1wvZGU2c3d3ZC04YWUwYmJhNy1mODc5LTQzZGItOWYzNC0zM2QwNjdlYTM2ODMucG5nIiwid2lkdGgiOiI8PTEyODAifV1dLCJhdWQiOlsidXJuOnNlcnZpY2U6aW1hZ2Uub3BlcmF0aW9ucyJdfQ.u2McWPeJ1MDJokGkVa3qnJlYJFoldamHt9B6rGtSf9Y", + "picture_name": "Autumn Mood", + "author": "Endemilk", + "description": "digital_art, a girl in a white dress standing under a tree with autumn leaves", + "category": "digital art" + }, + { + "file_name": "HumanArt/images/3D_virtual_human/garage_kits/000000005603.jpg", + "height": 600, + "width": 700, + "id": 12000000005603, + "page_url": "https://www.goodsmile.info/ja/product/6010/%E3%81%AD%E3%82%93%E3%81%A9%E3%82%8D%E3%81%84%E3%81%A9+%E3%82%A8%E3%83%83%E3%82%AF%E3%82%B9+%E3%83%95%E3%83%AB%E3%82%A2%E3%83%BC%E3%83%9E%E3%83%BC.html", + "image_url": "https://images.goodsmile.info/cgm/images/product/20161014/6010/41809/large/7b2d02a6a8a8d89af3a34f70942fdcc7.jpg", + "picture_name": "Irregular hunter who wants peace", + "author": "None", + "description": "garage_kits, a figurine of a character holding a gun", + "category": "garage kits" + }, + { + "file_name": "HumanArt/images/real_human/acrobatics/000000000590.jpg", + "height": 612, + "width": 589, + "id": 15000000000590, + "page_url": "https://www.istockphoto.com/hk/search/2/image?phrase=acrobatics&page=", + "image_url": "https://media.istockphoto.com/photos/women-couple-of-dancers-acrobats-picture-id494524123?k=20&m=494524123&s=612x612&w=0&h=Mt-1N5a2aCS3n6spX_Fw8JRmf3zAO2VnvB4T0mGCN4s=", + "picture_name": "None", + "author": "None", + "description": "acrobatics, two women in green and white performing acrobatics", + "category": "acrobatics" + } + ], + "annotations": [ + { + "keypoints": [ + 715.4305, + 970.0197, + 2, + 698.8416, + 942.6802, + 2, + 679.6984, + 941.7231, + 2, + 644.7338, + 948.259, + 2, + 611.9386, + 946.367, + 2, + 518.0118, + 1122.8295, + 2, + 656.3654, + 1106.6009, + 2, + 529.2618, + 1364.4753, + 2, + 589.2787, + 1375.8299, + 2, + 687.9009, + 1377.9864, + 2, + 744.6238, + 1409.0027, + 2, + 557.0198, + 1505.5454, + 2, + 680.6947, + 1499.8197, + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "keypoints_21": [ + 715.4305, + 970.0197, + 2, + 698.8416, + 942.6802, + 2, + 679.6984, + 941.7231, + 2, + 644.7338, + 948.259, + 2, + 611.9386, + 946.367, + 2, + 518.0118, + 1122.8295, + 2, + 656.3654, + 1106.6009, + 2, + 529.2618, + 1364.4753, + 2, + 589.2787, + 1375.8299, + 2, + 687.9009, + 1377.9864, + 2, + 744.6238, + 1409.0027, + 2, + 557.0198, + 1505.5454, + 2, + 680.6947, + 1499.8197, + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 711.6695, + 1391.3213, + 2, + 764.9766, + 1420.8272, + 2, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "self_contact": [], + "num_keypoints": 13, + "num_keypoints_21": 15, + "iscrowd": 0, + "image_id": 2000000001648, + "area": 288736.90076053096, + "bbox": [ + 468.61884, + 828.9586400000001, + 355.629312, + 811.9041119999999 + ], + "category_id": 1, + "id": 2000000006746, + "annotator": 67037 + }, + { + "keypoints": [ + 313.972, + 252.666, + 2, + 333.8015, + 228.7117, + 2, + 272.5658, + 207.7711, + 2, + 342.3681, + 227.4426, + 2, + 200.6833, + 204.2117, + 2, + 0, + 0, + 0, + 251.3643, + 302.7895, + 2, + 0, + 0, + 0, + 275.9871, + 312.5822, + 2, + 0, + 0, + 0, + 292.1347, + 313.8643, + 2, + 304.7952, + 403.3614, + 2, + 286.269, + 402.473, + 2, + 330.7358, + 441.4618, + 2, + 260.2096, + 441.0565, + 2, + 321.9826, + 495.339, + 2, + 222.4324, + 493.9369, + 2 + ], + "keypoints_21": [ + 313.972, + 252.666, + 2, + 333.8015, + 228.7117, + 2, + 272.5658, + 207.7711, + 2, + 342.3681, + 227.4426, + 2, + 200.6833, + 204.2117, + 2, + 0, + 0, + 0, + 251.3643, + 302.7895, + 2, + 0, + 0, + 0, + 275.9871, + 312.5822, + 2, + 0, + 0, + 0, + 292.1347, + 313.8643, + 2, + 304.7952, + 403.3614, + 2, + 286.269, + 402.473, + 2, + 330.7358, + 441.4618, + 2, + 260.2096, + 441.0565, + 2, + 321.9826, + 495.339, + 2, + 222.4324, + 493.9369, + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 398.5162, + 556.9002, + 2, + 212.5182, + 563.4001, + 2 + ], + "self_contact": [], + "num_keypoints": 14, + "num_keypoints_21": 16, + "iscrowd": 0, + "image_id": 12000000005603, + "area": 132932.1180077885, + "bbox": [ + 161.11672, + 132.37402000000003, + 284.87937600000004, + 466.62597999999997 + ], + "category_id": 1, + "id": 12000000076660, + "annotator": 66991 + }, + { + "keypoints": [ + 319.2161, + 546.3765, + 2, + 317.6563, + 536.4973, + 2, + 315.5024, + 536.8374, + 2, + 295.7777, + 539.4827, + 2, + 290.2372, + 538.9287, + 2, + 260.5583, + 539.1473, + 2, + 252.989, + 559.7042, + 2, + 222.0985, + 494.5581, + 2, + 204.3461, + 496.7641, + 2, + 229.7767, + 555.3691, + 2, + 203.9402, + 564.1676, + 2, + 254.6329, + 440.3163, + 2, + 252.7878, + 421.1483, + 2, + 351.9561, + 400.9315, + 2, + 368.0247, + 412.8534, + 2, + 347.6211, + 500.3006, + 2, + 367.0544, + 542.1705, + 2 + ], + "keypoints_21": [ + 319.2161, + 546.3765, + 2, + 317.6563, + 536.4973, + 2, + 315.5024, + 536.8374, + 2, + 295.7777, + 539.4827, + 2, + 290.2372, + 538.9287, + 2, + 260.5583, + 539.1473, + 2, + 252.989, + 559.7042, + 2, + 222.0985, + 494.5581, + 2, + 204.3461, + 496.7641, + 2, + 229.7767, + 555.3691, + 2, + 203.9402, + 564.1676, + 2, + 254.6329, + 440.3163, + 2, + 252.7878, + 421.1483, + 2, + 351.9561, + 400.9315, + 2, + 368.0247, + 412.8534, + 2, + 347.6211, + 500.3006, + 2, + 367.0544, + 542.1705, + 2, + 248.5114, + 559.976, + 2, + 253.5939, + 575.1541, + 2, + 357.1097, + 548.0375, + 2, + 379.7624, + 573.8666, + 2 + ], + "self_contact": [ + [ + 245.1376, + 570.4875 + ] + ], + "num_keypoints": 17, + "num_keypoints_21": 21, + "iscrowd": 0, + "image_id": 15000000000590, + "area": 62008.05021846336, + "bbox": [ + 168.77576, + 366.08698000000004, + 253.18396800000005, + 244.91301999999996 + ], + "category_id": 1, + "id": 15000000092347, + "annotator": 66705 + }, + { + "keypoints": [ + 233.1389, + 406.6037, + 2, + 243.5176, + 397.9166, + 2, + 243.0948, + 396.1787, + 2, + 235.8086, + 380.0257, + 2, + 233.4394, + 371.1951, + 2, + 200.7799, + 367.2566, + 2, + 222.3385, + 339.9251, + 2, + 218.5684, + 431.6162, + 2, + 216.3631, + 433.129, + 2, + 238.3363, + 495.4999, + 2, + 240.2118, + 500.6888, + 2, + 253.2291, + 222.9011, + 2, + 270.424, + 250.1, + 2, + 192.7242, + 138.9058, + 2, + 372.9364, + 324.4092, + 2, + 148.4319, + 79.9982, + 2, + 444.6949, + 407.9868, + 2 + ], + "keypoints_21": [ + 233.1389, + 406.6037, + 2, + 243.5176, + 397.9166, + 2, + 243.0948, + 396.1787, + 2, + 235.8086, + 380.0257, + 2, + 233.4394, + 371.1951, + 2, + 200.7799, + 367.2566, + 2, + 222.3385, + 339.9251, + 2, + 218.5684, + 431.6162, + 2, + 216.3631, + 433.129, + 2, + 238.3363, + 495.4999, + 2, + 240.2118, + 500.6888, + 2, + 253.2291, + 222.9011, + 2, + 270.424, + 250.1, + 2, + 192.7242, + 138.9058, + 2, + 372.9364, + 324.4092, + 2, + 148.4319, + 79.9982, + 2, + 444.6949, + 407.9868, + 2, + 245.196, + 517.5082, + 2, + 238.3205, + 541.3807, + 2, + 113.9739, + 40.4267, + 2, + 501.7295, + 448.3217, + 2 + ], + "self_contact": [], + "num_keypoints": 17, + "num_keypoints_21": 21, + "iscrowd": 0, + "image_id": 15000000000590, + "area": 337013.68142, + "bbox": [ + 36.42278, + 0, + 551.57722, + 611 + ], + "category_id": 1, + "id": 15000000092348, + "annotator": 66705 + } + ], + "categories": [ + { + "supercategory": "person", + "id": 1, + "name": "person", + "keypoints": [ + "nose", + "left_eye", + "right_eye", + "left_ear", + "right_ear", + "left_shoulder", + "right_shoulder", + "left_elbow", + "right_elbow", + "left_wrist", + "right_wrist", + "left_hip", + "right_hip", + "left_knee", + "right_knee", + "left_ankle", + "right_ankle", + "left_finger", + "right_finger", + "left_toe", + "right_toe" + ], + "skeleton": [ + [ + 20, + 16 + ], + [ + 16, + 14 + ], + [ + 14, + 12 + ], + [ + 21, + 17 + ], + [ + 17, + 15 + ], + [ + 15, + 13 + ], + [ + 12, + 13 + ], + [ + 6, + 12 + ], + [ + 7, + 13 + ], + [ + 6, + 7 + ], + [ + 6, + 8 + ], + [ + 7, + 9 + ], + [ + 10, + 18 + ], + [ + 8, + 10 + ], + [ + 11, + 19 + ], + [ + 9, + 11 + ], + [ + 2, + 3 + ], + [ + 1, + 2 + ], + [ + 1, + 3 + ], + [ + 2, + 4 + ], + [ + 3, + 5 + ], + [ + 4, + 6 + ], + [ + 5, + 7 + ] + ] + } + ] +} \ No newline at end of file diff --git a/tests/data/humanart/test_humanart_det_AP_H_56.json b/tests/data/humanart/test_humanart_det_AP_H_56.json new file mode 100644 index 0000000000..753caa0c07 --- /dev/null +++ b/tests/data/humanart/test_humanart_det_AP_H_56.json @@ -0,0 +1,145 @@ +[ + { + "bbox": [ + 411.55450439453125, + 773.5175170898438, + 925.8963623046875, + 1736.38623046875 + ], + "category_id": 1, + "image_id": 2000000001648, + "score": 0.9018925428390503 + }, + { + "bbox": [ + 23.97265625, + 19.622175216674805, + 1121.828369140625, + 1269.2109375 + ], + "category_id": 1, + "image_id": 2000000001648, + "score": 0.4558742344379425 + }, + { + "bbox": [ + 82.678466796875, + 475.8934020996094, + 1093.4742431640625, + 1717.331298828125 + ], + "category_id": 1, + "image_id": 2000000001648, + "score": 0.37606894969940186 + }, + { + "bbox": [ + 393.59222412109375, + 125.75264739990234, + 895.0135498046875, + 1201.154296875 + ], + "category_id": 1, + "image_id": 2000000001648, + "score": 0.08204865455627441 + }, + { + "bbox": [ + 75.03559875488281, + 52.54023742675781, + 759.2489624023438, + 974.7556762695312 + ], + "category_id": 1, + "image_id": 2000000001648, + "score": 0.07333727180957794 + }, + { + "bbox": [ + 197.08047485351562, + 139.95877075195312, + 402.2601318359375, + 591.4268188476562 + ], + "category_id": 1, + "image_id": 12000000005603, + "score": 0.9604519009590149 + }, + { + "bbox": [ + 67.07928466796875, + 132.88070678710938, + 535.9130249023438, + 600.0 + ], + "category_id": 1, + "image_id": 12000000005603, + "score": 0.10827567428350449 + }, + { + "bbox": [ + 21.64974594116211, + 0.0, + 564.9321899414062, + 592.8584594726562 + ], + "category_id": 1, + "image_id": 15000000000590, + "score": 0.9986042380332947 + }, + { + "bbox": [ + 158.69786071777344, + 249.30482482910156, + 410.9751281738281, + 608.938720703125 + ], + "category_id": 1, + "image_id": 15000000000590, + "score": 0.7594972252845764 + }, + { + "bbox": [ + 184.25045776367188, + 370.5571594238281, + 361.1768493652344, + 601.1585083007812 + ], + "category_id": 1, + "image_id": 15000000000590, + "score": 0.26641231775283813 + }, + { + "bbox": [ + 129.24253845214844, + 251.26560974121094, + 552.2449951171875, + 517.3319702148438 + ], + "category_id": 1, + "image_id": 15000000000590, + "score": 0.05408962443470955 + }, + { + "bbox": [ + 168.77576, + 366.08698000000004, + 421.95972800000004, + 611.0 + ], + "category_id": 1, + "image_id": 15000000000590, + "score": 0.6465661513194214 + }, + { + "bbox": [ + 36.42278, + 0.0, + 588.0, + 611.0 + ], + "category_id": 1, + "image_id": 15000000000590, + "score": 0.844070429325392 + } +] \ No newline at end of file diff --git a/tests/test_datasets/test_datasets/test_body_datasets/test_humanart_dataset.py b/tests/test_datasets/test_datasets/test_body_datasets/test_humanart_dataset.py new file mode 100644 index 0000000000..dcf29ab692 --- /dev/null +++ b/tests/test_datasets/test_datasets/test_body_datasets/test_humanart_dataset.py @@ -0,0 +1,160 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from unittest import TestCase + +import numpy as np + +from mmpose.datasets.datasets.body import HumanArtDataset + + +class TestHumanartDataset(TestCase): + + def build_humanart_dataset(self, **kwargs): + + cfg = dict( + ann_file='test_humanart.json', + bbox_file=None, + data_mode='topdown', + data_root='tests/data/humanart', + pipeline=[], + test_mode=False) + + cfg.update(kwargs) + return HumanArtDataset(**cfg) + + def check_data_info_keys(self, + data_info: dict, + data_mode: str = 'topdown'): + if data_mode == 'topdown': + expected_keys = dict( + img_id=int, + img_path=str, + bbox=np.ndarray, + bbox_score=np.ndarray, + keypoints=np.ndarray, + keypoints_visible=np.ndarray, + id=int) + elif data_mode == 'bottomup': + expected_keys = dict( + img_id=int, + img_path=str, + bbox=np.ndarray, + bbox_score=np.ndarray, + keypoints=np.ndarray, + keypoints_visible=np.ndarray, + invalid_segs=list, + id=list) + else: + raise ValueError(f'Invalid data_mode {data_mode}') + + for key, type_ in expected_keys.items(): + self.assertIn(key, data_info) + self.assertIsInstance(data_info[key], type_, key) + + def check_metainfo_keys(self, metainfo: dict): + expected_keys = dict( + dataset_name=str, + num_keypoints=int, + keypoint_id2name=dict, + keypoint_name2id=dict, + upper_body_ids=list, + lower_body_ids=list, + flip_indices=list, + flip_pairs=list, + keypoint_colors=np.ndarray, + num_skeleton_links=int, + skeleton_links=list, + skeleton_link_colors=np.ndarray, + dataset_keypoint_weights=np.ndarray) + + for key, type_ in expected_keys.items(): + self.assertIn(key, metainfo) + self.assertIsInstance(metainfo[key], type_, key) + + def test_metainfo(self): + dataset = self.build_humanart_dataset() + self.check_metainfo_keys(dataset.metainfo) + # test dataset_name + self.assertEqual(dataset.metainfo['dataset_name'], 'Human-Art') + + # test number of keypoints + num_keypoints = 17 + self.assertEqual(dataset.metainfo['num_keypoints'], num_keypoints) + self.assertEqual( + len(dataset.metainfo['keypoint_colors']), num_keypoints) + self.assertEqual( + len(dataset.metainfo['dataset_keypoint_weights']), num_keypoints) + # note that len(sigmas) may be zero if dataset.metainfo['sigmas'] = [] + self.assertEqual(len(dataset.metainfo['sigmas']), num_keypoints) + + # test some extra metainfo + self.assertEqual( + len(dataset.metainfo['skeleton_links']), + len(dataset.metainfo['skeleton_link_colors'])) + + def test_topdown(self): + # test topdown training + dataset = self.build_humanart_dataset(data_mode='topdown') + self.assertEqual(len(dataset), 4) + self.check_data_info_keys(dataset[0], data_mode='topdown') + + # test topdown testing + dataset = self.build_humanart_dataset( + data_mode='topdown', test_mode=True) + self.assertEqual(len(dataset), 4) + self.check_data_info_keys(dataset[0], data_mode='topdown') + + # test topdown testing with bbox file + dataset = self.build_humanart_dataset( + data_mode='topdown', + test_mode=True, + bbox_file='tests/data/humanart/test_humanart_det_AP_H_56.json') + self.assertEqual(len(dataset), 13) + self.check_data_info_keys(dataset[0], data_mode='topdown') + + # test topdown testing with filter config + dataset = self.build_humanart_dataset( + data_mode='topdown', + test_mode=True, + bbox_file='tests/data/humanart/test_humanart_det_AP_H_56.json', + filter_cfg=dict(bbox_score_thr=0.3)) + self.assertEqual(len(dataset), 8) + + def test_bottomup(self): + # test bottomup training + dataset = self.build_humanart_dataset(data_mode='bottomup') + self.assertEqual(len(dataset), 3) + self.check_data_info_keys(dataset[0], data_mode='bottomup') + + # test bottomup testing + dataset = self.build_humanart_dataset( + data_mode='bottomup', test_mode=True) + self.assertEqual(len(dataset), 3) + self.check_data_info_keys(dataset[0], data_mode='bottomup') + + def test_exceptions_and_warnings(self): + + with self.assertRaisesRegex(ValueError, 'got invalid data_mode'): + _ = self.build_humanart_dataset(data_mode='invalid') + + with self.assertRaisesRegex( + ValueError, + '"bbox_file" is only supported when `test_mode==True`'): + _ = self.build_humanart_dataset( + data_mode='topdown', + test_mode=False, + bbox_file='tests/data/humanart/test_humanart_det_AP_H_56.json') + + with self.assertRaisesRegex( + ValueError, '"bbox_file" is only supported in topdown mode'): + _ = self.build_humanart_dataset( + data_mode='bottomup', + test_mode=True, + bbox_file='tests/data/humanart/test_humanart_det_AP_H_56.json') + + with self.assertRaisesRegex( + ValueError, + '"bbox_score_thr" is only supported in topdown mode'): + _ = self.build_humanart_dataset( + data_mode='bottomup', + test_mode=True, + filter_cfg=dict(bbox_score_thr=0.3)) diff --git a/tools/dist_train.sh b/tools/dist_train.sh old mode 100644 new mode 100755