diff --git a/README.md b/README.md
index 951c4adf2e..c14110ffd3 100644
--- a/README.md
+++ b/README.md
@@ -284,6 +284,7 @@ A summary can be found in the [Model Zoo](https://mmpose.readthedocs.io/en/lates
- [x] [InterHand2.6M](https://mmpose.readthedocs.io/en/latest/model_zoo_papers/datasets.html#interhand2-6m-eccv-2020) \[[homepage](https://mks0601.github.io/InterHand2.6M/)\] (ECCV'2020)
- [x] [AP-10K](https://mmpose.readthedocs.io/en/latest/model_zoo_papers/datasets.html#ap-10k-neurips-2021) \[[homepage](https://github.com/AlexTheBad/AP-10K)\] (NeurIPS'2021)
- [x] [Horse-10](https://mmpose.readthedocs.io/en/latest/model_zoo_papers/datasets.html#horse-10-wacv-2021) \[[homepage](http://www.mackenziemathislab.org/horse10)\] (WACV'2021)
+- [x] [Human-Art](#todo) \[[homepage](https://idea-research.github.io/HumanArt/)\] (CVPR'2023)
diff --git a/README_CN.md b/README_CN.md
index 49a956cab9..f3a6af62f9 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -282,6 +282,7 @@ MMPose v1.0.0 是一个重大更新,包括了大量的 API 和配置文件的
- [x] [InterHand2.6M](https://mmpose.readthedocs.io/zh_CN/latest/model_zoo_papers/datasets.html#interhand2-6m-eccv-2020) \[[主页](https://mks0601.github.io/InterHand2.6M/)\] (ECCV'2020)
- [x] [AP-10K](https://mmpose.readthedocs.io/en/latest/model_zoo_papers/datasets.html#ap-10k-neurips-2021) \[[主页](https://github.com/AlexTheBad/AP-10K)\] (NeurIPS'2021)
- [x] [Horse-10](https://mmpose.readthedocs.io/zh_CN/latest/model_zoo_papers/datasets.html#horse-10-wacv-2021) \[[主页](http://www.mackenziemathislab.org/horse10)\] (WACV'2021)
+- [x] [Human-Art](#todo) \[[homepage](https://idea-research.github.io/HumanArt/)\] (CVPR'2023)
diff --git a/configs/_base_/datasets/humanart.py b/configs/_base_/datasets/humanart.py
new file mode 100644
index 0000000000..b549269b69
--- /dev/null
+++ b/configs/_base_/datasets/humanart.py
@@ -0,0 +1,181 @@
+dataset_info = dict(
+ dataset_name='Human-Art',
+ paper_info=dict(
+ author='Ju, Xuan and Zeng, Ailing and '
+ 'Wang, Jianan and Xu, Qiang and Zhang, Lei',
+ title='Human-Art: A Versatile Human-Centric Dataset '
+ 'Bridging Natural and Artificial Scenes',
+ container='Proceedings of the IEEE/CVF Conference on '
+ 'Computer Vision and Pattern Recognition',
+ year='2023',
+ homepage='https://idea-research.github.io/HumanArt/',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
+ 1:
+ dict(
+ name='left_eye',
+ id=1,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_eye'),
+ 2:
+ dict(
+ name='right_eye',
+ id=2,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_eye'),
+ 3:
+ dict(
+ name='left_ear',
+ id=3,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_ear'),
+ 4:
+ dict(
+ name='right_ear',
+ id=4,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_ear'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='right_shoulder',
+ id=6,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 7:
+ dict(
+ name='left_elbow',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 8:
+ dict(
+ name='right_elbow',
+ id=8,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 9:
+ dict(
+ name='left_wrist',
+ id=9,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='left_hip',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 12:
+ dict(
+ name='right_hip',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 13:
+ dict(
+ name='left_knee',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 14:
+ dict(
+ name='right_knee',
+ id=14,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 15:
+ dict(
+ name='left_ankle',
+ id=15,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 16:
+ dict(
+ name='right_ankle',
+ id=16,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+ 1:
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+ 2:
+ dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
+ 5:
+ dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
+ 6:
+ dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(
+ link=('left_shoulder', 'right_shoulder'),
+ id=7,
+ color=[51, 153, 255]),
+ 8:
+ dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
+ 9:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
+ 13:
+ dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
+ 14:
+ dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
+ 15:
+ dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
+ 16:
+ dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
+ 17:
+ dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
+ 18:
+ dict(
+ link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255])
+ },
+ joint_weights=[
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
+ 1.5
+ ],
+ sigmas=[
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
+ 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
+ ])
diff --git a/configs/_base_/datasets/humanart_aic.py b/configs/_base_/datasets/humanart_aic.py
new file mode 100644
index 0000000000..e999427536
--- /dev/null
+++ b/configs/_base_/datasets/humanart_aic.py
@@ -0,0 +1,205 @@
+dataset_info = dict(
+ dataset_name='humanart',
+ paper_info=[
+ dict(
+ author='Ju, Xuan and Zeng, Ailing and '
+ 'Wang, Jianan and Xu, Qiang and Zhang, '
+ 'Lei',
+ title='Human-Art: A Versatile Human-Centric Dataset '
+ 'Bridging Natural and Artificial Scenes',
+ container='CVPR',
+ year='2023',
+ homepage='https://idea-research.github.io/HumanArt/',
+ ),
+ dict(
+ author='Wu, Jiahong and Zheng, He and Zhao, Bo and '
+ 'Li, Yixin and Yan, Baoming and Liang, Rui and '
+ 'Wang, Wenjia and Zhou, Shipei and Lin, Guosen and '
+ 'Fu, Yanwei and others',
+ title='Ai challenger: A large-scale dataset for going '
+ 'deeper in image understanding',
+ container='arXiv',
+ year='2017',
+ homepage='https://github.com/AIChallenger/AI_Challenger_2017',
+ ),
+ ],
+ keypoint_info={
+ 0:
+ dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
+ 1:
+ dict(
+ name='left_eye',
+ id=1,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_eye'),
+ 2:
+ dict(
+ name='right_eye',
+ id=2,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_eye'),
+ 3:
+ dict(
+ name='left_ear',
+ id=3,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_ear'),
+ 4:
+ dict(
+ name='right_ear',
+ id=4,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_ear'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='right_shoulder',
+ id=6,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 7:
+ dict(
+ name='left_elbow',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 8:
+ dict(
+ name='right_elbow',
+ id=8,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 9:
+ dict(
+ name='left_wrist',
+ id=9,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='left_hip',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 12:
+ dict(
+ name='right_hip',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 13:
+ dict(
+ name='left_knee',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 14:
+ dict(
+ name='right_knee',
+ id=14,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 15:
+ dict(
+ name='left_ankle',
+ id=15,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 16:
+ dict(
+ name='right_ankle',
+ id=16,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 17:
+ dict(
+ name='head_top',
+ id=17,
+ color=[51, 153, 255],
+ type='upper',
+ swap=''),
+ 18:
+ dict(name='neck', id=18, color=[51, 153, 255], type='upper', swap='')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+ 1:
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+ 2:
+ dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
+ 5:
+ dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
+ 6:
+ dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(
+ link=('left_shoulder', 'right_shoulder'),
+ id=7,
+ color=[51, 153, 255]),
+ 8:
+ dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
+ 9:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
+ 13:
+ dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
+ 14:
+ dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
+ 15:
+ dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
+ 16:
+ dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
+ 17:
+ dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
+ 18:
+ dict(
+ link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255]),
+ 19:
+ dict(link=('head_top', 'neck'), id=11, color=[51, 153, 255]),
+ },
+ joint_weights=[
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
+ 1.5, 1.5
+ ],
+ sigmas=[
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
+ 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089, 0.026, 0.026
+ ])
diff --git a/configs/body_2d_keypoint/rtmpose/README.md b/configs/body_2d_keypoint/rtmpose/README.md
index 3037974917..38fd938376 100644
--- a/configs/body_2d_keypoint/rtmpose/README.md
+++ b/configs/body_2d_keypoint/rtmpose/README.md
@@ -37,3 +37,21 @@ Results on CrowdPose test with [YOLOv3](https://github.com/eriklindernoren/PyTor
| Model | Input Size | AP | AR | Details and Download |
| :-------: | :--------: | :---: | :---: | :------------------------------------------------------: |
| RTMPose-m | 256x192 | 0.706 | 0.788 | [rtmpose_crowdpose.md](./crowdpose/rtmpose_crowdpose.md) |
+
+### Human-Art Dataset
+
+Results on Human-Art validation dataset with detector having human AP of 56.2 on Human-Art validation dataset
+
+| Model | Input Size | AP | AR | Details and Download |
+| :-------: | :--------: | :---: | :---: | :---------------------------------------------------: |
+| RTMPose-s | 256x192 | 0.311 | 0.381 | [rtmpose_humanart.md](./humanart/rtmpose_humanart.md) |
+| RTMPose-m | 256x192 | 0.355 | 0.417 | [rtmpose_humanart.md](./humanart/rtmpose_humanart.md) |
+| RTMPose-l | 256x192 | 0.378 | 0.442 | [rtmpose_humanart.md](./humanart/rtmpose_humanart.md) |
+
+Results on Human-Art validation dataset with ground-truth bounding-box
+
+| Model | Input Size | AP | AR | Details and Download |
+| :-------: | :--------: | :---: | :---: | :---------------------------------------------------: |
+| RTMPose-s | 256x192 | 0.698 | 0.732 | [rtmpose_humanart.md](./humanart/rtmpose_humanart.md) |
+| RTMPose-m | 256x192 | 0.728 | 0.759 | [rtmpose_humanart.md](./humanart/rtmpose_humanart.md) |
+| RTMPose-l | 256x192 | 0.753 | 0.783 | [rtmpose_humanart.md](./humanart/rtmpose_humanart.md) |
diff --git a/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-l_8xb256-420e_humanart-256x192.py b/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-l_8xb256-420e_humanart-256x192.py
new file mode 100644
index 0000000000..384a712d95
--- /dev/null
+++ b/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-l_8xb256-420e_humanart-256x192.py
@@ -0,0 +1,232 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 420
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 210 to 420 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(192, 256),
+ sigma=(4.9, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=1.,
+ widen_factor=1.,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=1024,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True))
+
+# base dataset settings
+dataset_type = 'HumanArtDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='HumanArt/annotations/training_humanart_coco.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='HumanArt/annotations/validation_humanart.json',
+ # bbox_file=f'{data_root}HumanArt/person_detection_results/'
+ # 'HumanArt_validation_detections_AP_H_56_person.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'HumanArt/annotations/validation_humanart.json')
+test_evaluator = val_evaluator
diff --git a/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-m_8xb256-420e_humanart-256x192.py b/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-m_8xb256-420e_humanart-256x192.py
new file mode 100644
index 0000000000..30178cbb6d
--- /dev/null
+++ b/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-m_8xb256-420e_humanart-256x192.py
@@ -0,0 +1,232 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 420
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 210 to 420 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(192, 256),
+ sigma=(4.9, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.67,
+ widen_factor=0.75,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=768,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True))
+
+# base dataset settings
+dataset_type = 'HumanArtDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='HumanArt/annotations/training_humanart_coco.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='HumanArt/annotations/validation_humanart.json',
+ # bbox_file=f'{data_root}HumanArt/person_detection_results/'
+ # 'HumanArt_validation_detections_AP_H_56_person.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'HumanArt/annotations/validation_humanart.json')
+test_evaluator = val_evaluator
diff --git a/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-s_8xb256-420e_humanart-256x192.py b/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-s_8xb256-420e_humanart-256x192.py
new file mode 100644
index 0000000000..b4263f25e7
--- /dev/null
+++ b/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-s_8xb256-420e_humanart-256x192.py
@@ -0,0 +1,232 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 420
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 210 to 420 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(192, 256),
+ sigma=(4.9, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.33,
+ widen_factor=0.5,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmpose/cspnext-s_udp-aic-coco_210e-256x192-92f5a029_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=512,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True))
+
+# base dataset settings
+dataset_type = 'HumanArtDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='HumanArt/annotations/training_humanart_coco.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='HumanArt/annotations/validation_humanart.json',
+ # bbox_file=f'{data_root}HumanArt/person_detection_results/'
+ # 'HumanArt_validation_detections_AP_H_56_person.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'HumanArt/annotations/validation_humanart.json')
+test_evaluator = val_evaluator
diff --git a/configs/body_2d_keypoint/rtmpose/humanart/rtmpose_humanart.md b/configs/body_2d_keypoint/rtmpose/humanart/rtmpose_humanart.md
new file mode 100644
index 0000000000..bfd925b2c8
--- /dev/null
+++ b/configs/body_2d_keypoint/rtmpose/humanart/rtmpose_humanart.md
@@ -0,0 +1,110 @@
+
+
+
+RTMPose (arXiv'2023)
+
+```bibtex
+@misc{https://doi.org/10.48550/arxiv.2303.07399,
+ doi = {10.48550/ARXIV.2303.07399},
+ url = {https://arxiv.org/abs/2303.07399},
+ author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai},
+ keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
+ title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose},
+ publisher = {arXiv},
+ year = {2023},
+ copyright = {Creative Commons Attribution 4.0 International}
+}
+
+```
+
+
+
+
+
+
+RTMDet (arXiv'2022)
+
+```bibtex
+@misc{lyu2022rtmdet,
+ title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
+ author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
+ year={2022},
+ eprint={2212.07784},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+
+Human-Art (CVPR'2023)
+
+```bibtex
+@inproceedings{ju2023humanart,
+ title={Human-Art: A Versatile Human-Centric Dataset Bridging Natural and Artificial Scenes},
+ author={Ju, Xuan and Zeng, Ailing and Jianan, Wang and Qiang, Xu and Lei, Zhang},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),
+ year={2023}}
+```
+
+
+
+Results on Human-Art validation dataset with detector having human AP of 56.2 on Human-Art validation dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [rtmpose-s-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py) | 256x192 | 0.199 | 0.328 | 0.198 | 0.261 | 0.418 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-coco_pt-aic-coco_420e-256x192-8edcf0d7_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-coco_pt-aic-coco_420e-256x192-8edcf0d7_20230127.json) |
+| [rtmpose-s-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-s_8xb256-420e_humanart-256x192.py) | 256x192 | 0.311 | 0.462 | 0.323 | 0.381 | 0.540 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_8xb256-420e_humanart-256x192-5a3ac943_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_8xb256-420e_humanart-256x192-5a3ac943_20230611.json) |
+| [rtmpose-m-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py) | 256x192 | 0.239 | 0.372 | 0.243 | 0.302 | 0.455 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco_pt-aic-coco_420e-256x192-d8dd5ca4_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco_pt-aic-coco_420e-256x192-d8dd5ca4_20230127.json) |
+| [rtmpose-m-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-m_8xb256-420e_humanart-256x192.py) | 256x192 | 0.355 | 0.503 | 0.377 | 0.417 | 0.568 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_8xb256-420e_humanart-256x192-8430627b_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_8xb256-420e_humanart-256x192-8430627b_20230611.json) |
+| [rtmpose-l-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py) | 256x192 | 0.260 | 0.393 | 0.267 | 0.323 | 0.472 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.json) |
+| [rtmpose-l-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-l_8xb256-420e_humanart-256x192.py) | 256x192 | 0.378 | 0.521 | 0.399 | 0.442 | 0.584 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_8xb256-420e_humanart-256x192-389f2cb0_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_8xb256-420e_humanart-256x192-389f2cb0_20230611.json) |
+
+Results on Human-Art validation dataset with ground-truth bounding-box
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [rtmpose-s-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py) | 256x192 | 0.480 | 0.739 | 0.498 | 0.521 | 0.763 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-coco_pt-aic-coco_420e-256x192-8edcf0d7_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-coco_pt-aic-coco_420e-256x192-8edcf0d7_20230127.json) |
+| [rtmpose-s-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-s_8xb256-420e_humanart-256x192.py) | 256x192 | 0.698 | 0.893 | 0.768 | 0.732 | 0.903 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_8xb256-420e_humanart-256x192-5a3ac943_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_8xb256-420e_humanart-256x192-5a3ac943_20230611.json) |
+| [rtmpose-m-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py) | 256x192 | 0.532 | 0.765 | 0.563 | 0.571 | 0.789 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco_pt-aic-coco_420e-256x192-d8dd5ca4_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco_pt-aic-coco_420e-256x192-d8dd5ca4_20230127.json) |
+| [rtmpose-m-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-m_8xb256-420e_humanart-256x192.py) | 256x192 | 0.728 | 0.895 | 0.791 | 0.759 | 0.906 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_8xb256-420e_humanart-256x192-8430627b_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_8xb256-420e_humanart-256x192-8430627b_20230611.json) |
+| [rtmpose-l-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py) | 256x192 | 0.564 | 0.789 | 0.602 | 0.599 | 0.808 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.json) |
+| [rtmpose-l-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-l_8xb256-420e_humanart-256x192.py) | 256x192 | 0.753 | 0.905 | 0.812 | 0.783 | 0.915 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_8xb256-420e_humanart-256x192-389f2cb0_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_8xb256-420e_humanart-256x192-389f2cb0_20230611.json) |
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [rtmpose-s-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py) | 256x192 | 0.716 | 0.892 | 0.789 | 0.768 | 0.929 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-coco_pt-aic-coco_420e-256x192-8edcf0d7_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-coco_pt-aic-coco_420e-256x192-8edcf0d7_20230127.json) |
+| [rtmpose-s-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-s_8xb256-420e_humanart-256x192.py) | 256x192 | 0.706 | 0.888 | 0.780 | 0.759 | 0.928 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_8xb256-420e_humanart-256x192-5a3ac943_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_8xb256-420e_humanart-256x192-5a3ac943_20230611.json) |
+| [rtmpose-m-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py) | 256x192 | 0.746 | 0.899 | 0.817 | 0.795 | 0.935 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco_pt-aic-coco_420e-256x192-d8dd5ca4_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco_pt-aic-coco_420e-256x192-d8dd5ca4_20230127.json) |
+| [rtmpose-m-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-m_8xb256-420e_humanart-256x192.py) | 256x192 | 0.725 | 0.892 | 0.795 | 0.775 | 0.929 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_8xb256-420e_humanart-256x192-8430627b_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_8xb256-420e_humanart-256x192-8430627b_20230611.json) |
+| [rtmpose-l-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py) | 256x192 | 0.758 | 0.906 | 0.826 | 0.806 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.json) |
+| [rtmpose-l-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-l_8xb256-420e_humanart-256x192.py) | 256x192 | 0.748 | 0.901 | 0.816 | 0.796 | 0.938 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_8xb256-420e_humanart-256x192-389f2cb0_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_8xb256-420e_humanart-256x192-389f2cb0_20230611.json) |
+
+Results on COCO val2017 with ground-truth bounding box
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [rtmpose-s-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-s_8xb256-420e_humanart-256x192.py) | 256x192 | 0.725 | 0.916 | 0.798 | 0.753 | 0.925 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_8xb256-420e_humanart-256x192-5a3ac943_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_8xb256-420e_humanart-256x192-5a3ac943_20230611.json) |
+| [rtmpose-m-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-m_8xb256-420e_humanart-256x192.py) | 256x192 | 0.744 | 0.916 | 0.818 | 0.770 | 0.930 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_8xb256-420e_humanart-256x192-8430627b_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_8xb256-420e_humanart-256x192-8430627b_20230611.json) |
+| [rtmpose-l-humanart-coco](/configs/body_2d_keypoint/rtmpose/humanart/rtmpose-l_8xb256-420e_humanart-256x192.py) | 256x192 | 0.770 | 0.927 | 0.840 | 0.794 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_8xb256-420e_humanart-256x192-389f2cb0_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_8xb256-420e_humanart-256x192-389f2cb0_20230611.json) |
diff --git a/configs/body_2d_keypoint/rtmpose/humanart/rtmpose_humanart.yml b/configs/body_2d_keypoint/rtmpose/humanart/rtmpose_humanart.yml
new file mode 100644
index 0000000000..f0f21b2d6f
--- /dev/null
+++ b/configs/body_2d_keypoint/rtmpose/humanart/rtmpose_humanart.yml
@@ -0,0 +1,106 @@
+Collections:
+- Name: RTMPose
+ Paper:
+ Title: "RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose"
+ URL: https://arxiv.org/abs/2303.07399
+ README: https://github.com/open-mmlab/mmpose/blob/main/projects/rtmpose/README.md
+Models:
+- Config: configs/body_2d_keypoint/rtmpose/humanart/rtmpose-l_8xb256-420e_humanart-256x192.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture: &id001
+ - RTMPose
+ Training Data: &id002
+ - COCO
+ - Human-Art
+ Name: rtmpose-l_8xb256-420e_humanart-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.748
+ AP@0.5: 0.901
+ AP@0.75: 0.816
+ AR: 0.796
+ AR@0.5: 0.938
+ Task: Body 2D Keypoint
+ - Dataset: Human-Art
+ Metrics:
+ AP: 0.378
+ AP@0.5: 0.521
+ AP@0.75: 0.399
+ AR: 0.442
+ AR@0.5: 0.584
+ Task: Body 2D Keypoint
+ - Dataset: Human-Art(GT)
+ Metrics:
+ AP: 0.753
+ AP@0.5: 0.905
+ AP@0.75: 0.812
+ AR: 0.783
+ AR@0.5: 0.915
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_8xb256-420e_humanart-256x192-389f2cb0_20230611.pth
+- Config: configs/body_2d_keypoint/rtmpose/humanart/rtmpose-m_8xb256-420e_humanart-256x192.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture: *id001
+ Training Data: *id002
+ Name: rtmpose-m_8xb256-420e_humanart-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.725
+ AP@0.5: 0.892
+ AP@0.75: 0.795
+ AR: 0.775
+ AR@0.5: 0.929
+ Task: Body 2D Keypoint
+ - Dataset: Human-Art
+ Metrics:
+ AP: 0.355
+ AP@0.5: 0.503
+ AP@0.75: 0.377
+ AR: 0.417
+ AR@0.5: 0.568
+ Task: Body 2D Keypoint
+ - Dataset: Human-Art(GT)
+ Metrics:
+ AP: 0.728
+ AP@0.5: 0.895
+ AP@0.75: 0.791
+ AR: 0.759
+ AR@0.5: 0.906
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_8xb256-420e_humanart-256x192-8430627b_20230611.pth
+- Config: configs/body_2d_keypoint/rtmpose/humanart/rtmpose-s_8xb256-420e_humanart-256x192.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture: *id001
+ Training Data: *id002
+ Name: rtmpose-s_8xb256-420e_humanart-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.706
+ AP@0.5: 0.888
+ AP@0.75: 0.780
+ AR: 0.759
+ AR@0.5: 0.928
+ Task: Body 2D Keypoint
+ - Dataset: Human-Art
+ Metrics:
+ AP: 0.311
+ AP@0.5: 0.462
+ AP@0.75: 0.323
+ AR: 0.381
+ AR@0.5: 0.540
+ Task: Body 2D Keypoint
+ - Dataset: Human-Art(GT)
+ Metrics:
+ AP: 0.698
+ AP@0.5: 0.893
+ AP@0.75: 0.768
+ AR: 0.732
+ AR@0.5: 0.903
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_8xb256-420e_humanart-256x192-5a3ac943_20230611.pth
diff --git a/configs/body_2d_keypoint/topdown_heatmap/README.md b/configs/body_2d_keypoint/topdown_heatmap/README.md
index 9e23b874bc..47aae219e4 100644
--- a/configs/body_2d_keypoint/topdown_heatmap/README.md
+++ b/configs/body_2d_keypoint/topdown_heatmap/README.md
@@ -115,3 +115,19 @@ Results on PoseTrack2018 val with ground-truth bounding boxes.
| HRNet-w48 | 256x192 | 84.6 | [hrnet_posetrack18.md](./posetrack18/hrnet_posetrack18.md) |
| HRNet-w32 | 256x192 | 83.4 | [hrnet_posetrack18.md](./posetrack18/hrnet_posetrack18.md) |
| ResNet-50 | 256x192 | 81.2 | [resnet_posetrack18.md](./posetrack18/resnet_posetrack18.md) |
+
+### Human-Art Dataset
+
+Results on Human-Art validation dataset with detector having human AP of 56.2 on Human-Art validation dataset
+
+| Model | Input Size | AP | AR | Details and Download |
+| :-------: | :--------: | :---: | :---: | :---------------------------------------------------: |
+| ViTPose-s | 256x192 | 0.381 | 0.448 | [vitpose_humanart.md](./humanart/vitpose_humanart.md) |
+| ViTPose-b | 256x192 | 0.410 | 0.475 | [vitpose_humanart.md](./humanart/vitpose_humanart.md) |
+
+Results on Human-Art validation dataset with ground-truth bounding-box
+
+| Model | Input Size | AP | AR | Details and Download |
+| :-------: | :--------: | :---: | :---: | :---------------------------------------------------: |
+| ViTPose-s | 256x192 | 0.738 | 0.768 | [vitpose_humanart.md](./humanart/vitpose_humanart.md) |
+| ViTPose-b | 256x192 | 0.759 | 0.790 | [vitpose_humanart.md](./humanart/vitpose_humanart.md) |
diff --git a/configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-base_8xb64-210e_humanart-256x192.py b/configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-base_8xb64-210e_humanart-256x192.py
new file mode 100644
index 0000000000..6f08f404fb
--- /dev/null
+++ b/configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-base_8xb64-210e_humanart-256x192.py
@@ -0,0 +1,150 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+custom_imports = dict(
+ imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'],
+ allow_failed_imports=False)
+
+optim_wrapper = dict(
+ optimizer=dict(
+ type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1),
+ paramwise_cfg=dict(
+ num_layers=12,
+ layer_decay_rate=0.75,
+ custom_keys={
+ 'bias': dict(decay_multi=0.0),
+ 'pos_embed': dict(decay_mult=0.0),
+ 'relative_position_bias_table': dict(decay_mult=0.0),
+ 'norm': dict(decay_mult=0.0),
+ },
+ ),
+ constructor='LayerDecayOptimWrapperConstructor',
+ clip_grad=dict(max_norm=1., norm_type=2),
+)
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='mmcls.VisionTransformer',
+ arch='base',
+ img_size=(256, 192),
+ patch_size=16,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ with_cls_token=False,
+ output_cls_token=False,
+ patch_cfg=dict(padding=2),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'v1/pretrained_models/mae_pretrain_vit_base.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=768,
+ out_channels=17,
+ deconv_out_channels=(256, 256),
+ deconv_kernel_sizes=(4, 4),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+data_root = 'data/'
+dataset_type = 'HumanArtDataset'
+data_mode = 'topdown'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='HumanArt/annotations/training_humanart_coco.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='HumanArt/annotations/validation_humanart.json',
+ bbox_file=f'{data_root}HumanArt/person_detection_results/'
+ 'HumanArt_validation_detections_AP_H_56_person.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'HumanArt/annotations/validation_humanart.json')
+test_evaluator = val_evaluator
diff --git a/configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-small_8xb64-210e_humanart-256x192.py b/configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-small_8xb64-210e_humanart-256x192.py
new file mode 100644
index 0000000000..6daf87cc90
--- /dev/null
+++ b/configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-small_8xb64-210e_humanart-256x192.py
@@ -0,0 +1,155 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+custom_imports = dict(
+ imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'],
+ allow_failed_imports=False)
+
+optim_wrapper = dict(
+ optimizer=dict(
+ type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1),
+ paramwise_cfg=dict(
+ num_layers=12,
+ layer_decay_rate=0.8,
+ custom_keys={
+ 'bias': dict(decay_multi=0.0),
+ 'pos_embed': dict(decay_mult=0.0),
+ 'relative_position_bias_table': dict(decay_mult=0.0),
+ 'norm': dict(decay_mult=0.0),
+ },
+ ),
+ constructor='LayerDecayOptimWrapperConstructor',
+ clip_grad=dict(max_norm=1., norm_type=2),
+)
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='mmcls.VisionTransformer',
+ arch={
+ 'embed_dims': 384,
+ 'num_layers': 12,
+ 'num_heads': 12,
+ 'feedforward_channels': 384 * 4
+ },
+ img_size=(256, 192),
+ patch_size=16,
+ qkv_bias=True,
+ drop_path_rate=0.1,
+ with_cls_token=False,
+ output_cls_token=False,
+ patch_cfg=dict(padding=2),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'v1/pretrained_models/mae_pretrain_vit_small.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=384,
+ out_channels=17,
+ deconv_out_channels=(256, 256),
+ deconv_kernel_sizes=(4, 4),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+data_root = 'data/'
+dataset_type = 'HumanArtDataset'
+data_mode = 'topdown'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='HumanArt/annotations/training_humanart_coco.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='HumanArt/annotations/validation_humanart.json',
+ bbox_file=f'{data_root}HumanArt/person_detection_results/'
+ 'HumanArt_validation_detections_AP_H_56_person.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'HumanArt/annotations/validation_humanart.json')
+test_evaluator = val_evaluator
diff --git a/configs/body_2d_keypoint/topdown_heatmap/humanart/vitpose_humanart.md b/configs/body_2d_keypoint/topdown_heatmap/humanart/vitpose_humanart.md
new file mode 100644
index 0000000000..1e559aa4da
--- /dev/null
+++ b/configs/body_2d_keypoint/topdown_heatmap/humanart/vitpose_humanart.md
@@ -0,0 +1,85 @@
+To utilize ViTPose, you'll need to have [MMClassification](https://github.com/open-mmlab/mmclassification). To install the required version, run the following command:
+
+```shell
+mim install 'mmcls>=1.0.0rc5'
+```
+
+
+
+
+
+ViTPose (NeurIPS'2022)
+
+```bibtex
+@inproceedings{
+ xu2022vitpose,
+ title={Vi{TP}ose: Simple Vision Transformer Baselines for Human Pose Estimation},
+ author={Yufei Xu and Jing Zhang and Qiming Zhang and Dacheng Tao},
+ booktitle={Advances in Neural Information Processing Systems},
+ year={2022},
+}
+```
+
+
+
+
+
+
+COCO-WholeBody (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+
+Human-Art (CVPR'2023)
+
+```bibtex
+@inproceedings{ju2023humanart,
+ title={Human-Art: A Versatile Human-Centric Dataset Bridging Natural and Artificial Scenes},
+ author={Ju, Xuan and Zeng, Ailing and Jianan, Wang and Qiang, Xu and Lei, Zhang},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),
+ year={2023}}
+```
+
+
+
+Results on Human-Art validation dataset with detector having human AP of 56.2 on Human-Art validation dataset
+
+> With classic decoder
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [ViTPose-S-coco](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192.py) | 256x192 | 0.228 | 0.371 | 0.229 | 0.298 | 0.467 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192-62d7a712_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192-62d7a712_20230314.json) |
+| [ViTPose-S-humanart-coco](configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-small_8xb64-210e_humanart-256x192.py) | 256x192 | 0.381 | 0.532 | 0.405 | 0.448 | 0.602 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-small_8xb64-210e_humanart-256x192-5cbe2bfc_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-small_8xb64-210e_humanart-256x192-5cbe2bfc_20230611.json) |
+| [ViTPose-B-coco](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192.py) | 256x192 | 0.270 | 0.423 | 0.272 | 0.340 | 0.510 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192-216eae50_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192-216eae50_20230314.json) |
+| [ViTPose-B-humanart-coco](configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-base_8xb64-210e_humanart-256x192.py) | 256x192 | 0.410 | 0.549 | 0.434 | 0.475 | 0.615 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-base_8xb64-210e_humanart-256x192-b417f546_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-base_8xb64-210e_humanart-256x192-b417f546_20230611.json) |
+
+Results on Human-Art validation dataset with ground-truth bounding-box
+
+> With classic decoder
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [ViTPose-S-coco](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192.py) | 256x192 | 0.507 | 0.758 | 0.531 | 0.551 | 0.780 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192-62d7a712_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192-62d7a712_20230314.json) |
+| [ViTPose-S-humanart-coco](configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-small_8xb64-210e_humanart-256x192.py) | 256x192 | 0.738 | 0.905 | 0.802 | 0.768 | 0.911 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-small_8xb64-210e_humanart-256x192-5cbe2bfc_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-small_8xb64-210e_humanart-256x192-5cbe2bfc_20230611.json) |
+| [ViTPose-B-coco](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192.py) | 256x192 | 0.555 | 0.782 | 0.590 | 0.599 | 0.809 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192-216eae50_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192-216eae50_20230314.json) |
+| [ViTPose-B-humanart-coco](configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-base_8xb64-210e_humanart-256x192.py) | 256x192 | 0.759 | 0.905 | 0.823 | 0.790 | 0.917 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-base_8xb64-210e_humanart-256x192-b417f546_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-base_8xb64-210e_humanart-256x192-b417f546_20230611.json) |
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+> With classic decoder
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [ViTPose-S-coco](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192.py) | 256x192 | 0.739 | 0.903 | 0.816 | 0.792 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192-62d7a712_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192-62d7a712_20230314.json) |
+| [ViTPose-S-humanart-coco](configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-small_8xb64-210e_humanart-256x192.py) | 256x192 | 0.737 | 0.902 | 0.811 | 0.792 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-small_8xb64-210e_humanart-256x192-5cbe2bfc_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-small_8xb64-210e_humanart-256x192-5cbe2bfc_20230611.json) |
+| [ViTPose-B-coco](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192.py) | 256x192 | 0.757 | 0.905 | 0.829 | 0.810 | 0.946 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192-216eae50_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192-216eae50_20230314.json) |
+| [ViTPose-B-humanart-coco](configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-base_8xb64-210e_humanart-256x192.py) | 256x192 | 0.758 | 0.906 | 0.829 | 0.812 | 0.946 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-base_8xb64-210e_humanart-256x192-b417f546_20230611.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-base_8xb64-210e_humanart-256x192-b417f546_20230611.json) |
diff --git a/configs/body_2d_keypoint/topdown_heatmap/humanart/vitpose_humanart.yml b/configs/body_2d_keypoint/topdown_heatmap/humanart/vitpose_humanart.yml
new file mode 100644
index 0000000000..12a557fbf6
--- /dev/null
+++ b/configs/body_2d_keypoint/topdown_heatmap/humanart/vitpose_humanart.yml
@@ -0,0 +1,79 @@
+Collections:
+- Name: ViTPose
+ Paper:
+ Title: 'ViTPose: Simple Vision Transformer Baselines for Human Pose Estimation'
+ URL: https://arxiv.org/abs/2204.12484
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/vitpose.md
+ Metadata:
+ Training Resources: 8x A100 GPUs
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-small_8xb64-210e_humanart-256x192.py
+ In Collection: ViTPose
+ Metadata:
+ Architecture: &id001
+ - ViTPose
+ - Classic Head
+ Model Size: Small
+ Training Data: &id002
+ - COCO
+ - Human-Art
+ Name: td-hm_ViTPose-small_8xb64-210e_humanart-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.737
+ AP@0.5: 0.902
+ AP@0.75: 0.811
+ AR: 0.792
+ AR@0.5: 0.942
+ Task: Body 2D Keypoint
+ - Dataset: Human-Art
+ Metrics:
+ AP: 0.381
+ AP@0.5: 0.532
+ AP@0.75: 0.405
+ AR: 0.448
+ AR@0.5: 0.602
+ Task: Body 2D Keypoint
+ - Dataset: Human-Art(GT)
+ Metrics:
+ AP: 0.738
+ AP@0.5: 0.905
+ AP@0.75: 0.802
+ AR: 0.768
+ AR@0.5: 0.911
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-small_8xb64-210e_humanart-256x192-5cbe2bfc_20230611.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/humanart/td-hm_ViTPose-base_8xb64-210e_humanart-256x192.py
+ In Collection: ViTPose
+ Metadata:
+ Architecture: *id001
+ Model Size: Base
+ Training Data: *id002
+ Name: td-hm_ViTPose-base_8xb64-210e_humanart-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.758
+ AP@0.5: 0.906
+ AP@0.75: 0.829
+ AR: 0.812
+ AR@0.5: 0.946
+ Task: Body 2D Keypoint
+ - Dataset: Human-Art
+ Metrics:
+ AP: 0.410
+ AP@0.5: 0.549
+ AP@0.75: 0.434
+ AR: 0.475
+ AR@0.5: 0.615
+ Task: Body 2D Keypoint
+ - Dataset: Human-Art(GT)
+ Metrics:
+ AP: 0.759
+ AP@0.5: 0.905
+ AP@0.75: 0.823
+ AR: 0.790
+ AR@0.5: 0.917
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/human_art/td-hm_ViTPose-base_8xb64-210e_humanart-256x192-b417f546_20230611.pth
diff --git a/docs/en/dataset_zoo/2d_body_keypoint.md b/docs/en/dataset_zoo/2d_body_keypoint.md
index c5bf70a3f8..4448ebe8f4 100644
--- a/docs/en/dataset_zoo/2d_body_keypoint.md
+++ b/docs/en/dataset_zoo/2d_body_keypoint.md
@@ -13,6 +13,7 @@ MMPose supported datasets:
- [CrowdPose](#crowdpose) \[ [Homepage](https://github.com/Jeff-sjtu/CrowdPose) \]
- [OCHuman](#ochuman) \[ [Homepage](https://github.com/liruilong940607/OCHumanApi) \]
- [MHP](#mhp) \[ [Homepage](https://lv-mhp.github.io/dataset) \]
+ - [Human-Art](#humanart) \[ [Homepage](https://idea-research.github.io/HumanArt/) \]
- Videos
- [PoseTrack18](#posetrack18) \[ [Homepage](https://posetrack.net/users/download.php) \]
- [sub-JHMDB](#sub-jhmdb-dataset) \[ [Homepage](http://jhmdb.is.tue.mpg.de/dataset) \]
@@ -386,6 +387,57 @@ mmpose
│ │ │-- ...~~~~
```
+## Human-Art dataset
+
+
+
+
+Human-Art (CVPR'2023)
+
+```bibtex
+@inproceedings{ju2023humanart,
+ title={Human-Art: A Versatile Human-Centric Dataset Bridging Natural and Artificial Scenes},
+ author={Ju, Xuan and Zeng, Ailing and Jianan, Wang and Qiang, Xu and Lei, Zhang},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),
+ year={2023}}
+```
+
+
+
+
+

+
+
+For [Human-Art](https://idea-research.github.io/HumanArt/) data, please download the images and annotation files from [its website](https://idea-research.github.io/HumanArt/). You need to fill in the [data form](https://docs.google.com/forms/d/e/1FAIpQLScroT_jvw6B9U2Qca1_cl5Kmmu1ceKtlh6DJNmWLte8xNEhEw/viewform) to get access to the data.
+Move them under $MMPOSE/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+|── data
+ │── HumanArt
+ │-- images
+ │ │-- 2D_virtual_human
+ │ │ |-- cartoon
+ │ │ | |-- 000000000000.jpg
+ │ │ | |-- ...
+ │ │ |-- digital_art
+ │ │ |-- ...
+ │ |-- 3D_virtual_human
+ │ |-- real_human
+ |-- annotations
+ │ │-- validation_humanart.json
+ │ │-- training_humanart_coco.json
+ |-- person_detection_results
+ │ │-- HumanArt_validation_detections_AP_H_56_person.json
+```
+
+You can choose whether to download other annotation files in Human-Art. If you want to use additional annotation files (e.g. validation set of cartoon), you need to edit the corresponding code in config file.
+
## PoseTrack18
diff --git a/docs/zh_cn/dataset_zoo/2d_body_keypoint.md b/docs/zh_cn/dataset_zoo/2d_body_keypoint.md
index c5bf70a3f8..4448ebe8f4 100644
--- a/docs/zh_cn/dataset_zoo/2d_body_keypoint.md
+++ b/docs/zh_cn/dataset_zoo/2d_body_keypoint.md
@@ -13,6 +13,7 @@ MMPose supported datasets:
- [CrowdPose](#crowdpose) \[ [Homepage](https://github.com/Jeff-sjtu/CrowdPose) \]
- [OCHuman](#ochuman) \[ [Homepage](https://github.com/liruilong940607/OCHumanApi) \]
- [MHP](#mhp) \[ [Homepage](https://lv-mhp.github.io/dataset) \]
+ - [Human-Art](#humanart) \[ [Homepage](https://idea-research.github.io/HumanArt/) \]
- Videos
- [PoseTrack18](#posetrack18) \[ [Homepage](https://posetrack.net/users/download.php) \]
- [sub-JHMDB](#sub-jhmdb-dataset) \[ [Homepage](http://jhmdb.is.tue.mpg.de/dataset) \]
@@ -386,6 +387,57 @@ mmpose
│ │ │-- ...~~~~
```
+## Human-Art dataset
+
+
+
+
+Human-Art (CVPR'2023)
+
+```bibtex
+@inproceedings{ju2023humanart,
+ title={Human-Art: A Versatile Human-Centric Dataset Bridging Natural and Artificial Scenes},
+ author={Ju, Xuan and Zeng, Ailing and Jianan, Wang and Qiang, Xu and Lei, Zhang},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),
+ year={2023}}
+```
+
+
+
+
+

+
+
+For [Human-Art](https://idea-research.github.io/HumanArt/) data, please download the images and annotation files from [its website](https://idea-research.github.io/HumanArt/). You need to fill in the [data form](https://docs.google.com/forms/d/e/1FAIpQLScroT_jvw6B9U2Qca1_cl5Kmmu1ceKtlh6DJNmWLte8xNEhEw/viewform) to get access to the data.
+Move them under $MMPOSE/data, and make them look like this:
+
+```text
+mmpose
+├── mmpose
+├── docs
+├── tests
+├── tools
+├── configs
+|── data
+ │── HumanArt
+ │-- images
+ │ │-- 2D_virtual_human
+ │ │ |-- cartoon
+ │ │ | |-- 000000000000.jpg
+ │ │ | |-- ...
+ │ │ |-- digital_art
+ │ │ |-- ...
+ │ |-- 3D_virtual_human
+ │ |-- real_human
+ |-- annotations
+ │ │-- validation_humanart.json
+ │ │-- training_humanart_coco.json
+ |-- person_detection_results
+ │ │-- HumanArt_validation_detections_AP_H_56_person.json
+```
+
+You can choose whether to download other annotation files in Human-Art. If you want to use additional annotation files (e.g. validation set of cartoon), you need to edit the corresponding code in config file.
+
## PoseTrack18
diff --git a/mmpose/datasets/datasets/body/__init__.py b/mmpose/datasets/datasets/body/__init__.py
index a4aeef8519..1405b0d675 100644
--- a/mmpose/datasets/datasets/body/__init__.py
+++ b/mmpose/datasets/datasets/body/__init__.py
@@ -2,6 +2,7 @@
from .aic_dataset import AicDataset
from .coco_dataset import CocoDataset
from .crowdpose_dataset import CrowdPoseDataset
+from .humanart_dataset import HumanArtDataset
from .jhmdb_dataset import JhmdbDataset
from .mhp_dataset import MhpDataset
from .mpii_dataset import MpiiDataset
@@ -13,5 +14,5 @@
__all__ = [
'CocoDataset', 'MpiiDataset', 'MpiiTrbDataset', 'AicDataset',
'CrowdPoseDataset', 'OCHumanDataset', 'MhpDataset', 'PoseTrack18Dataset',
- 'JhmdbDataset', 'PoseTrack18VideoDataset'
+ 'JhmdbDataset', 'PoseTrack18VideoDataset', 'HumanArtDataset'
]
diff --git a/mmpose/datasets/datasets/body/humanart_dataset.py b/mmpose/datasets/datasets/body/humanart_dataset.py
new file mode 100644
index 0000000000..719f35fc9e
--- /dev/null
+++ b/mmpose/datasets/datasets/body/humanart_dataset.py
@@ -0,0 +1,73 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmpose.registry import DATASETS
+from ..base import BaseCocoStyleDataset
+
+
+@DATASETS.register_module()
+class HumanArtDataset(BaseCocoStyleDataset):
+ """Human-Art dataset for pose estimation.
+
+ "Human-Art: A Versatile Human-Centric Dataset
+ Bridging Natural and Artificial Scenes", CVPR'2023.
+ More details can be found in the `paper
+ `__ .
+
+ Human-Art keypoints::
+
+ 0: 'nose',
+ 1: 'left_eye',
+ 2: 'right_eye',
+ 3: 'left_ear',
+ 4: 'right_ear',
+ 5: 'left_shoulder',
+ 6: 'right_shoulder',
+ 7: 'left_elbow',
+ 8: 'right_elbow',
+ 9: 'left_wrist',
+ 10: 'right_wrist',
+ 11: 'left_hip',
+ 12: 'right_hip',
+ 13: 'left_knee',
+ 14: 'right_knee',
+ 15: 'left_ankle',
+ 16: 'right_ankle'
+
+ Args:
+ ann_file (str): Annotation file path. Default: ''.
+ bbox_file (str, optional): Detection result file path. If
+ ``bbox_file`` is set, detected bboxes loaded from this file will
+ be used instead of ground-truth bboxes. This setting is only for
+ evaluation, i.e., ignored when ``test_mode`` is ``False``.
+ Default: ``None``.
+ data_mode (str): Specifies the mode of data samples: ``'topdown'`` or
+ ``'bottomup'``. In ``'topdown'`` mode, each data sample contains
+ one instance; while in ``'bottomup'`` mode, each data sample
+ contains all instances in a image. Default: ``'topdown'``
+ metainfo (dict, optional): Meta information for dataset, such as class
+ information. Default: ``None``.
+ data_root (str, optional): The root directory for ``data_prefix`` and
+ ``ann_file``. Default: ``None``.
+ data_prefix (dict, optional): Prefix for training data. Default:
+ ``dict(img=None, ann=None)``.
+ filter_cfg (dict, optional): Config for filter data. Default: `None`.
+ indices (int or Sequence[int], optional): Support using first few
+ data in annotation file to facilitate training/testing on a smaller
+ dataset. Default: ``None`` which means using all ``data_infos``.
+ serialize_data (bool, optional): Whether to hold memory using
+ serialized objects, when enabled, data loader workers can use
+ shared RAM from master process instead of making a copy.
+ Default: ``True``.
+ pipeline (list, optional): Processing pipeline. Default: [].
+ test_mode (bool, optional): ``test_mode=True`` means in test phase.
+ Default: ``False``.
+ lazy_init (bool, optional): Whether to load annotation during
+ instantiation. In some cases, such as visualization, only the meta
+ information of the dataset is needed, which is not necessary to
+ load annotation file. ``Basedataset`` can skip load annotations to
+ save time by set ``lazy_init=False``. Default: ``False``.
+ max_refetch (int, optional): If ``Basedataset.prepare_data`` get a
+ None img. The maximum extra number of cycles to get a valid
+ image. Default: 1000.
+ """
+
+ METAINFO: dict = dict(from_file='configs/_base_/datasets/humanart.py')
diff --git a/tests/data/humanart/2D_virtual_human/digital_art/000000001648.jpg b/tests/data/humanart/2D_virtual_human/digital_art/000000001648.jpg
new file mode 100644
index 0000000000..8f2202760b
Binary files /dev/null and b/tests/data/humanart/2D_virtual_human/digital_art/000000001648.jpg differ
diff --git a/tests/data/humanart/3D_virtual_human/garage_kits/000000005603.jpg b/tests/data/humanart/3D_virtual_human/garage_kits/000000005603.jpg
new file mode 100644
index 0000000000..21f551c324
Binary files /dev/null and b/tests/data/humanart/3D_virtual_human/garage_kits/000000005603.jpg differ
diff --git a/tests/data/humanart/real_human/acrobatics/000000000590.jpg b/tests/data/humanart/real_human/acrobatics/000000000590.jpg
new file mode 100644
index 0000000000..15efbec533
Binary files /dev/null and b/tests/data/humanart/real_human/acrobatics/000000000590.jpg differ
diff --git a/tests/data/humanart/test_humanart.json b/tests/data/humanart/test_humanart.json
new file mode 100644
index 0000000000..8cf13e3530
--- /dev/null
+++ b/tests/data/humanart/test_humanart.json
@@ -0,0 +1,716 @@
+{
+ "info": {
+ "description": "For testing Human-Art dataset only.",
+ "year": 2023,
+ "date_created": "2023/06/12"
+ },
+ "images": [
+ {
+ "file_name": "HumanArt/images/2D_virtual_human/digital_art/000000001648.jpg",
+ "height": 1750,
+ "width": 1280,
+ "id": 2000000001648,
+ "page_url": "https://www.deviantart.com/endemilk/art/Autumn-Mood-857953165",
+ "image_url": "https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/cef0f0b2-832e-4f53-95c6-32f822f796ac/de6swwd-8ae0bba7-f879-43db-9f34-33d067ea3683.png/v1/fill/w_1280,h_1750,q_80,strp/autumn_mood_by_endemilk_de6swwd-fullview.jpg?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ1cm46YXBwOjdlMGQxODg5ODIyNjQzNzNhNWYwZDQxNWVhMGQyNmUwIiwiaXNzIjoidXJuOmFwcDo3ZTBkMTg4OTgyMjY0MzczYTVmMGQ0MTVlYTBkMjZlMCIsIm9iaiI6W1t7ImhlaWdodCI6Ijw9MTc1MCIsInBhdGgiOiJcL2ZcL2NlZjBmMGIyLTgzMmUtNGY1My05NWM2LTMyZjgyMmY3OTZhY1wvZGU2c3d3ZC04YWUwYmJhNy1mODc5LTQzZGItOWYzNC0zM2QwNjdlYTM2ODMucG5nIiwid2lkdGgiOiI8PTEyODAifV1dLCJhdWQiOlsidXJuOnNlcnZpY2U6aW1hZ2Uub3BlcmF0aW9ucyJdfQ.u2McWPeJ1MDJokGkVa3qnJlYJFoldamHt9B6rGtSf9Y",
+ "picture_name": "Autumn Mood",
+ "author": "Endemilk",
+ "description": "digital_art, a girl in a white dress standing under a tree with autumn leaves",
+ "category": "digital art"
+ },
+ {
+ "file_name": "HumanArt/images/3D_virtual_human/garage_kits/000000005603.jpg",
+ "height": 600,
+ "width": 700,
+ "id": 12000000005603,
+ "page_url": "https://www.goodsmile.info/ja/product/6010/%E3%81%AD%E3%82%93%E3%81%A9%E3%82%8D%E3%81%84%E3%81%A9+%E3%82%A8%E3%83%83%E3%82%AF%E3%82%B9+%E3%83%95%E3%83%AB%E3%82%A2%E3%83%BC%E3%83%9E%E3%83%BC.html",
+ "image_url": "https://images.goodsmile.info/cgm/images/product/20161014/6010/41809/large/7b2d02a6a8a8d89af3a34f70942fdcc7.jpg",
+ "picture_name": "Irregular hunter who wants peace",
+ "author": "None",
+ "description": "garage_kits, a figurine of a character holding a gun",
+ "category": "garage kits"
+ },
+ {
+ "file_name": "HumanArt/images/real_human/acrobatics/000000000590.jpg",
+ "height": 612,
+ "width": 589,
+ "id": 15000000000590,
+ "page_url": "https://www.istockphoto.com/hk/search/2/image?phrase=acrobatics&page=",
+ "image_url": "https://media.istockphoto.com/photos/women-couple-of-dancers-acrobats-picture-id494524123?k=20&m=494524123&s=612x612&w=0&h=Mt-1N5a2aCS3n6spX_Fw8JRmf3zAO2VnvB4T0mGCN4s=",
+ "picture_name": "None",
+ "author": "None",
+ "description": "acrobatics, two women in green and white performing acrobatics",
+ "category": "acrobatics"
+ }
+ ],
+ "annotations": [
+ {
+ "keypoints": [
+ 715.4305,
+ 970.0197,
+ 2,
+ 698.8416,
+ 942.6802,
+ 2,
+ 679.6984,
+ 941.7231,
+ 2,
+ 644.7338,
+ 948.259,
+ 2,
+ 611.9386,
+ 946.367,
+ 2,
+ 518.0118,
+ 1122.8295,
+ 2,
+ 656.3654,
+ 1106.6009,
+ 2,
+ 529.2618,
+ 1364.4753,
+ 2,
+ 589.2787,
+ 1375.8299,
+ 2,
+ 687.9009,
+ 1377.9864,
+ 2,
+ 744.6238,
+ 1409.0027,
+ 2,
+ 557.0198,
+ 1505.5454,
+ 2,
+ 680.6947,
+ 1499.8197,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "keypoints_21": [
+ 715.4305,
+ 970.0197,
+ 2,
+ 698.8416,
+ 942.6802,
+ 2,
+ 679.6984,
+ 941.7231,
+ 2,
+ 644.7338,
+ 948.259,
+ 2,
+ 611.9386,
+ 946.367,
+ 2,
+ 518.0118,
+ 1122.8295,
+ 2,
+ 656.3654,
+ 1106.6009,
+ 2,
+ 529.2618,
+ 1364.4753,
+ 2,
+ 589.2787,
+ 1375.8299,
+ 2,
+ 687.9009,
+ 1377.9864,
+ 2,
+ 744.6238,
+ 1409.0027,
+ 2,
+ 557.0198,
+ 1505.5454,
+ 2,
+ 680.6947,
+ 1499.8197,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 711.6695,
+ 1391.3213,
+ 2,
+ 764.9766,
+ 1420.8272,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ],
+ "self_contact": [],
+ "num_keypoints": 13,
+ "num_keypoints_21": 15,
+ "iscrowd": 0,
+ "image_id": 2000000001648,
+ "area": 288736.90076053096,
+ "bbox": [
+ 468.61884,
+ 828.9586400000001,
+ 355.629312,
+ 811.9041119999999
+ ],
+ "category_id": 1,
+ "id": 2000000006746,
+ "annotator": 67037
+ },
+ {
+ "keypoints": [
+ 313.972,
+ 252.666,
+ 2,
+ 333.8015,
+ 228.7117,
+ 2,
+ 272.5658,
+ 207.7711,
+ 2,
+ 342.3681,
+ 227.4426,
+ 2,
+ 200.6833,
+ 204.2117,
+ 2,
+ 0,
+ 0,
+ 0,
+ 251.3643,
+ 302.7895,
+ 2,
+ 0,
+ 0,
+ 0,
+ 275.9871,
+ 312.5822,
+ 2,
+ 0,
+ 0,
+ 0,
+ 292.1347,
+ 313.8643,
+ 2,
+ 304.7952,
+ 403.3614,
+ 2,
+ 286.269,
+ 402.473,
+ 2,
+ 330.7358,
+ 441.4618,
+ 2,
+ 260.2096,
+ 441.0565,
+ 2,
+ 321.9826,
+ 495.339,
+ 2,
+ 222.4324,
+ 493.9369,
+ 2
+ ],
+ "keypoints_21": [
+ 313.972,
+ 252.666,
+ 2,
+ 333.8015,
+ 228.7117,
+ 2,
+ 272.5658,
+ 207.7711,
+ 2,
+ 342.3681,
+ 227.4426,
+ 2,
+ 200.6833,
+ 204.2117,
+ 2,
+ 0,
+ 0,
+ 0,
+ 251.3643,
+ 302.7895,
+ 2,
+ 0,
+ 0,
+ 0,
+ 275.9871,
+ 312.5822,
+ 2,
+ 0,
+ 0,
+ 0,
+ 292.1347,
+ 313.8643,
+ 2,
+ 304.7952,
+ 403.3614,
+ 2,
+ 286.269,
+ 402.473,
+ 2,
+ 330.7358,
+ 441.4618,
+ 2,
+ 260.2096,
+ 441.0565,
+ 2,
+ 321.9826,
+ 495.339,
+ 2,
+ 222.4324,
+ 493.9369,
+ 2,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 398.5162,
+ 556.9002,
+ 2,
+ 212.5182,
+ 563.4001,
+ 2
+ ],
+ "self_contact": [],
+ "num_keypoints": 14,
+ "num_keypoints_21": 16,
+ "iscrowd": 0,
+ "image_id": 12000000005603,
+ "area": 132932.1180077885,
+ "bbox": [
+ 161.11672,
+ 132.37402000000003,
+ 284.87937600000004,
+ 466.62597999999997
+ ],
+ "category_id": 1,
+ "id": 12000000076660,
+ "annotator": 66991
+ },
+ {
+ "keypoints": [
+ 319.2161,
+ 546.3765,
+ 2,
+ 317.6563,
+ 536.4973,
+ 2,
+ 315.5024,
+ 536.8374,
+ 2,
+ 295.7777,
+ 539.4827,
+ 2,
+ 290.2372,
+ 538.9287,
+ 2,
+ 260.5583,
+ 539.1473,
+ 2,
+ 252.989,
+ 559.7042,
+ 2,
+ 222.0985,
+ 494.5581,
+ 2,
+ 204.3461,
+ 496.7641,
+ 2,
+ 229.7767,
+ 555.3691,
+ 2,
+ 203.9402,
+ 564.1676,
+ 2,
+ 254.6329,
+ 440.3163,
+ 2,
+ 252.7878,
+ 421.1483,
+ 2,
+ 351.9561,
+ 400.9315,
+ 2,
+ 368.0247,
+ 412.8534,
+ 2,
+ 347.6211,
+ 500.3006,
+ 2,
+ 367.0544,
+ 542.1705,
+ 2
+ ],
+ "keypoints_21": [
+ 319.2161,
+ 546.3765,
+ 2,
+ 317.6563,
+ 536.4973,
+ 2,
+ 315.5024,
+ 536.8374,
+ 2,
+ 295.7777,
+ 539.4827,
+ 2,
+ 290.2372,
+ 538.9287,
+ 2,
+ 260.5583,
+ 539.1473,
+ 2,
+ 252.989,
+ 559.7042,
+ 2,
+ 222.0985,
+ 494.5581,
+ 2,
+ 204.3461,
+ 496.7641,
+ 2,
+ 229.7767,
+ 555.3691,
+ 2,
+ 203.9402,
+ 564.1676,
+ 2,
+ 254.6329,
+ 440.3163,
+ 2,
+ 252.7878,
+ 421.1483,
+ 2,
+ 351.9561,
+ 400.9315,
+ 2,
+ 368.0247,
+ 412.8534,
+ 2,
+ 347.6211,
+ 500.3006,
+ 2,
+ 367.0544,
+ 542.1705,
+ 2,
+ 248.5114,
+ 559.976,
+ 2,
+ 253.5939,
+ 575.1541,
+ 2,
+ 357.1097,
+ 548.0375,
+ 2,
+ 379.7624,
+ 573.8666,
+ 2
+ ],
+ "self_contact": [
+ [
+ 245.1376,
+ 570.4875
+ ]
+ ],
+ "num_keypoints": 17,
+ "num_keypoints_21": 21,
+ "iscrowd": 0,
+ "image_id": 15000000000590,
+ "area": 62008.05021846336,
+ "bbox": [
+ 168.77576,
+ 366.08698000000004,
+ 253.18396800000005,
+ 244.91301999999996
+ ],
+ "category_id": 1,
+ "id": 15000000092347,
+ "annotator": 66705
+ },
+ {
+ "keypoints": [
+ 233.1389,
+ 406.6037,
+ 2,
+ 243.5176,
+ 397.9166,
+ 2,
+ 243.0948,
+ 396.1787,
+ 2,
+ 235.8086,
+ 380.0257,
+ 2,
+ 233.4394,
+ 371.1951,
+ 2,
+ 200.7799,
+ 367.2566,
+ 2,
+ 222.3385,
+ 339.9251,
+ 2,
+ 218.5684,
+ 431.6162,
+ 2,
+ 216.3631,
+ 433.129,
+ 2,
+ 238.3363,
+ 495.4999,
+ 2,
+ 240.2118,
+ 500.6888,
+ 2,
+ 253.2291,
+ 222.9011,
+ 2,
+ 270.424,
+ 250.1,
+ 2,
+ 192.7242,
+ 138.9058,
+ 2,
+ 372.9364,
+ 324.4092,
+ 2,
+ 148.4319,
+ 79.9982,
+ 2,
+ 444.6949,
+ 407.9868,
+ 2
+ ],
+ "keypoints_21": [
+ 233.1389,
+ 406.6037,
+ 2,
+ 243.5176,
+ 397.9166,
+ 2,
+ 243.0948,
+ 396.1787,
+ 2,
+ 235.8086,
+ 380.0257,
+ 2,
+ 233.4394,
+ 371.1951,
+ 2,
+ 200.7799,
+ 367.2566,
+ 2,
+ 222.3385,
+ 339.9251,
+ 2,
+ 218.5684,
+ 431.6162,
+ 2,
+ 216.3631,
+ 433.129,
+ 2,
+ 238.3363,
+ 495.4999,
+ 2,
+ 240.2118,
+ 500.6888,
+ 2,
+ 253.2291,
+ 222.9011,
+ 2,
+ 270.424,
+ 250.1,
+ 2,
+ 192.7242,
+ 138.9058,
+ 2,
+ 372.9364,
+ 324.4092,
+ 2,
+ 148.4319,
+ 79.9982,
+ 2,
+ 444.6949,
+ 407.9868,
+ 2,
+ 245.196,
+ 517.5082,
+ 2,
+ 238.3205,
+ 541.3807,
+ 2,
+ 113.9739,
+ 40.4267,
+ 2,
+ 501.7295,
+ 448.3217,
+ 2
+ ],
+ "self_contact": [],
+ "num_keypoints": 17,
+ "num_keypoints_21": 21,
+ "iscrowd": 0,
+ "image_id": 15000000000590,
+ "area": 337013.68142,
+ "bbox": [
+ 36.42278,
+ 0,
+ 551.57722,
+ 611
+ ],
+ "category_id": 1,
+ "id": 15000000092348,
+ "annotator": 66705
+ }
+ ],
+ "categories": [
+ {
+ "supercategory": "person",
+ "id": 1,
+ "name": "person",
+ "keypoints": [
+ "nose",
+ "left_eye",
+ "right_eye",
+ "left_ear",
+ "right_ear",
+ "left_shoulder",
+ "right_shoulder",
+ "left_elbow",
+ "right_elbow",
+ "left_wrist",
+ "right_wrist",
+ "left_hip",
+ "right_hip",
+ "left_knee",
+ "right_knee",
+ "left_ankle",
+ "right_ankle",
+ "left_finger",
+ "right_finger",
+ "left_toe",
+ "right_toe"
+ ],
+ "skeleton": [
+ [
+ 20,
+ 16
+ ],
+ [
+ 16,
+ 14
+ ],
+ [
+ 14,
+ 12
+ ],
+ [
+ 21,
+ 17
+ ],
+ [
+ 17,
+ 15
+ ],
+ [
+ 15,
+ 13
+ ],
+ [
+ 12,
+ 13
+ ],
+ [
+ 6,
+ 12
+ ],
+ [
+ 7,
+ 13
+ ],
+ [
+ 6,
+ 7
+ ],
+ [
+ 6,
+ 8
+ ],
+ [
+ 7,
+ 9
+ ],
+ [
+ 10,
+ 18
+ ],
+ [
+ 8,
+ 10
+ ],
+ [
+ 11,
+ 19
+ ],
+ [
+ 9,
+ 11
+ ],
+ [
+ 2,
+ 3
+ ],
+ [
+ 1,
+ 2
+ ],
+ [
+ 1,
+ 3
+ ],
+ [
+ 2,
+ 4
+ ],
+ [
+ 3,
+ 5
+ ],
+ [
+ 4,
+ 6
+ ],
+ [
+ 5,
+ 7
+ ]
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/tests/data/humanart/test_humanart_det_AP_H_56.json b/tests/data/humanart/test_humanart_det_AP_H_56.json
new file mode 100644
index 0000000000..753caa0c07
--- /dev/null
+++ b/tests/data/humanart/test_humanart_det_AP_H_56.json
@@ -0,0 +1,145 @@
+[
+ {
+ "bbox": [
+ 411.55450439453125,
+ 773.5175170898438,
+ 925.8963623046875,
+ 1736.38623046875
+ ],
+ "category_id": 1,
+ "image_id": 2000000001648,
+ "score": 0.9018925428390503
+ },
+ {
+ "bbox": [
+ 23.97265625,
+ 19.622175216674805,
+ 1121.828369140625,
+ 1269.2109375
+ ],
+ "category_id": 1,
+ "image_id": 2000000001648,
+ "score": 0.4558742344379425
+ },
+ {
+ "bbox": [
+ 82.678466796875,
+ 475.8934020996094,
+ 1093.4742431640625,
+ 1717.331298828125
+ ],
+ "category_id": 1,
+ "image_id": 2000000001648,
+ "score": 0.37606894969940186
+ },
+ {
+ "bbox": [
+ 393.59222412109375,
+ 125.75264739990234,
+ 895.0135498046875,
+ 1201.154296875
+ ],
+ "category_id": 1,
+ "image_id": 2000000001648,
+ "score": 0.08204865455627441
+ },
+ {
+ "bbox": [
+ 75.03559875488281,
+ 52.54023742675781,
+ 759.2489624023438,
+ 974.7556762695312
+ ],
+ "category_id": 1,
+ "image_id": 2000000001648,
+ "score": 0.07333727180957794
+ },
+ {
+ "bbox": [
+ 197.08047485351562,
+ 139.95877075195312,
+ 402.2601318359375,
+ 591.4268188476562
+ ],
+ "category_id": 1,
+ "image_id": 12000000005603,
+ "score": 0.9604519009590149
+ },
+ {
+ "bbox": [
+ 67.07928466796875,
+ 132.88070678710938,
+ 535.9130249023438,
+ 600.0
+ ],
+ "category_id": 1,
+ "image_id": 12000000005603,
+ "score": 0.10827567428350449
+ },
+ {
+ "bbox": [
+ 21.64974594116211,
+ 0.0,
+ 564.9321899414062,
+ 592.8584594726562
+ ],
+ "category_id": 1,
+ "image_id": 15000000000590,
+ "score": 0.9986042380332947
+ },
+ {
+ "bbox": [
+ 158.69786071777344,
+ 249.30482482910156,
+ 410.9751281738281,
+ 608.938720703125
+ ],
+ "category_id": 1,
+ "image_id": 15000000000590,
+ "score": 0.7594972252845764
+ },
+ {
+ "bbox": [
+ 184.25045776367188,
+ 370.5571594238281,
+ 361.1768493652344,
+ 601.1585083007812
+ ],
+ "category_id": 1,
+ "image_id": 15000000000590,
+ "score": 0.26641231775283813
+ },
+ {
+ "bbox": [
+ 129.24253845214844,
+ 251.26560974121094,
+ 552.2449951171875,
+ 517.3319702148438
+ ],
+ "category_id": 1,
+ "image_id": 15000000000590,
+ "score": 0.05408962443470955
+ },
+ {
+ "bbox": [
+ 168.77576,
+ 366.08698000000004,
+ 421.95972800000004,
+ 611.0
+ ],
+ "category_id": 1,
+ "image_id": 15000000000590,
+ "score": 0.6465661513194214
+ },
+ {
+ "bbox": [
+ 36.42278,
+ 0.0,
+ 588.0,
+ 611.0
+ ],
+ "category_id": 1,
+ "image_id": 15000000000590,
+ "score": 0.844070429325392
+ }
+]
\ No newline at end of file
diff --git a/tests/test_datasets/test_datasets/test_body_datasets/test_humanart_dataset.py b/tests/test_datasets/test_datasets/test_body_datasets/test_humanart_dataset.py
new file mode 100644
index 0000000000..dcf29ab692
--- /dev/null
+++ b/tests/test_datasets/test_datasets/test_body_datasets/test_humanart_dataset.py
@@ -0,0 +1,160 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from unittest import TestCase
+
+import numpy as np
+
+from mmpose.datasets.datasets.body import HumanArtDataset
+
+
+class TestHumanartDataset(TestCase):
+
+ def build_humanart_dataset(self, **kwargs):
+
+ cfg = dict(
+ ann_file='test_humanart.json',
+ bbox_file=None,
+ data_mode='topdown',
+ data_root='tests/data/humanart',
+ pipeline=[],
+ test_mode=False)
+
+ cfg.update(kwargs)
+ return HumanArtDataset(**cfg)
+
+ def check_data_info_keys(self,
+ data_info: dict,
+ data_mode: str = 'topdown'):
+ if data_mode == 'topdown':
+ expected_keys = dict(
+ img_id=int,
+ img_path=str,
+ bbox=np.ndarray,
+ bbox_score=np.ndarray,
+ keypoints=np.ndarray,
+ keypoints_visible=np.ndarray,
+ id=int)
+ elif data_mode == 'bottomup':
+ expected_keys = dict(
+ img_id=int,
+ img_path=str,
+ bbox=np.ndarray,
+ bbox_score=np.ndarray,
+ keypoints=np.ndarray,
+ keypoints_visible=np.ndarray,
+ invalid_segs=list,
+ id=list)
+ else:
+ raise ValueError(f'Invalid data_mode {data_mode}')
+
+ for key, type_ in expected_keys.items():
+ self.assertIn(key, data_info)
+ self.assertIsInstance(data_info[key], type_, key)
+
+ def check_metainfo_keys(self, metainfo: dict):
+ expected_keys = dict(
+ dataset_name=str,
+ num_keypoints=int,
+ keypoint_id2name=dict,
+ keypoint_name2id=dict,
+ upper_body_ids=list,
+ lower_body_ids=list,
+ flip_indices=list,
+ flip_pairs=list,
+ keypoint_colors=np.ndarray,
+ num_skeleton_links=int,
+ skeleton_links=list,
+ skeleton_link_colors=np.ndarray,
+ dataset_keypoint_weights=np.ndarray)
+
+ for key, type_ in expected_keys.items():
+ self.assertIn(key, metainfo)
+ self.assertIsInstance(metainfo[key], type_, key)
+
+ def test_metainfo(self):
+ dataset = self.build_humanart_dataset()
+ self.check_metainfo_keys(dataset.metainfo)
+ # test dataset_name
+ self.assertEqual(dataset.metainfo['dataset_name'], 'Human-Art')
+
+ # test number of keypoints
+ num_keypoints = 17
+ self.assertEqual(dataset.metainfo['num_keypoints'], num_keypoints)
+ self.assertEqual(
+ len(dataset.metainfo['keypoint_colors']), num_keypoints)
+ self.assertEqual(
+ len(dataset.metainfo['dataset_keypoint_weights']), num_keypoints)
+ # note that len(sigmas) may be zero if dataset.metainfo['sigmas'] = []
+ self.assertEqual(len(dataset.metainfo['sigmas']), num_keypoints)
+
+ # test some extra metainfo
+ self.assertEqual(
+ len(dataset.metainfo['skeleton_links']),
+ len(dataset.metainfo['skeleton_link_colors']))
+
+ def test_topdown(self):
+ # test topdown training
+ dataset = self.build_humanart_dataset(data_mode='topdown')
+ self.assertEqual(len(dataset), 4)
+ self.check_data_info_keys(dataset[0], data_mode='topdown')
+
+ # test topdown testing
+ dataset = self.build_humanart_dataset(
+ data_mode='topdown', test_mode=True)
+ self.assertEqual(len(dataset), 4)
+ self.check_data_info_keys(dataset[0], data_mode='topdown')
+
+ # test topdown testing with bbox file
+ dataset = self.build_humanart_dataset(
+ data_mode='topdown',
+ test_mode=True,
+ bbox_file='tests/data/humanart/test_humanart_det_AP_H_56.json')
+ self.assertEqual(len(dataset), 13)
+ self.check_data_info_keys(dataset[0], data_mode='topdown')
+
+ # test topdown testing with filter config
+ dataset = self.build_humanart_dataset(
+ data_mode='topdown',
+ test_mode=True,
+ bbox_file='tests/data/humanart/test_humanart_det_AP_H_56.json',
+ filter_cfg=dict(bbox_score_thr=0.3))
+ self.assertEqual(len(dataset), 8)
+
+ def test_bottomup(self):
+ # test bottomup training
+ dataset = self.build_humanart_dataset(data_mode='bottomup')
+ self.assertEqual(len(dataset), 3)
+ self.check_data_info_keys(dataset[0], data_mode='bottomup')
+
+ # test bottomup testing
+ dataset = self.build_humanart_dataset(
+ data_mode='bottomup', test_mode=True)
+ self.assertEqual(len(dataset), 3)
+ self.check_data_info_keys(dataset[0], data_mode='bottomup')
+
+ def test_exceptions_and_warnings(self):
+
+ with self.assertRaisesRegex(ValueError, 'got invalid data_mode'):
+ _ = self.build_humanart_dataset(data_mode='invalid')
+
+ with self.assertRaisesRegex(
+ ValueError,
+ '"bbox_file" is only supported when `test_mode==True`'):
+ _ = self.build_humanart_dataset(
+ data_mode='topdown',
+ test_mode=False,
+ bbox_file='tests/data/humanart/test_humanart_det_AP_H_56.json')
+
+ with self.assertRaisesRegex(
+ ValueError, '"bbox_file" is only supported in topdown mode'):
+ _ = self.build_humanart_dataset(
+ data_mode='bottomup',
+ test_mode=True,
+ bbox_file='tests/data/humanart/test_humanart_det_AP_H_56.json')
+
+ with self.assertRaisesRegex(
+ ValueError,
+ '"bbox_score_thr" is only supported in topdown mode'):
+ _ = self.build_humanart_dataset(
+ data_mode='bottomup',
+ test_mode=True,
+ filter_cfg=dict(bbox_score_thr=0.3))
diff --git a/tools/dist_train.sh b/tools/dist_train.sh
old mode 100644
new mode 100755