@misc{https://doi.org/10.48550/arxiv.2303.07399,
doi = {10.48550/ARXIV.2303.07399},
url = {https://arxiv.org/abs/2303.07399},
author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai},
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose},
publisher = {arXiv},
year = {2023},
copyright = {Creative Commons Attribution 4.0 International}
}
@misc{lyu2022rtmdet,
title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
year={2022},
eprint={2212.07784},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
*
denotes model trained on 7 public datasets:
Body8
denotes the addition of the OCHuman dataset, in addition to the 7 datasets mentioned above, for evaluation.Config | Input Size | AP (COCO) | PCK@0.1 (Body8) | AUC (Body8) | EPE (Body8) | Params(M) | FLOPS(G) | Download |
---|---|---|---|---|---|---|---|---|
RTMPose-t* | 256x192 | 65.9 | 91.44 | 63.18 | 19.45 | 3.34 | 0.36 | Model |
RTMPose-s* | 256x192 | 69.7 | 92.45 | 65.15 | 17.85 | 5.47 | 0.68 | Model |
RTMPose-m* | 256x192 | 74.9 | 94.25 | 68.59 | 15.12 | 13.59 | 1.93 | Model |
RTMPose-l* | 256x192 | 76.7 | 95.08 | 70.14 | 13.79 | 27.66 | 4.16 | Model |
RTMPose-m* | 384x288 | 76.6 | 94.64 | 70.38 | 13.98 | 13.72 | 4.33 | Model |
RTMPose-l* | 384x288 | 78.3 | 95.36 | 71.58 | 13.08 | 27.79 | 9.35 | Model |