@article{zhang2020resnest,
title={ResNeSt: Split-Attention Networks},
author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander},
journal={arXiv preprint arXiv:2004.08955},
year={2020}
}
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
---|---|---|---|---|---|---|---|---|
pose_resnest_50 | 256x192 | 0.720 | 0.899 | 0.800 | 0.775 | 0.939 | ckpt | log |
pose_resnest_50 | 384x288 | 0.737 | 0.900 | 0.811 | 0.789 | 0.937 | ckpt | log |
pose_resnest_101 | 256x192 | 0.725 | 0.900 | 0.807 | 0.781 | 0.939 | ckpt | log |
pose_resnest_101 | 384x288 | 0.745 | 0.905 | 0.818 | 0.798 | 0.942 | ckpt | log |
pose_resnest_200 | 256x192 | 0.731 | 0.905 | 0.812 | 0.787 | 0.943 | ckpt | log |
pose_resnest_200 | 384x288 | 0.753 | 0.907 | 0.827 | 0.805 | 0.943 | ckpt | log |
pose_resnest_269 | 256x192 | 0.737 | 0.907 | 0.819 | 0.792 | 0.943 | ckpt | log |
pose_resnest_269 | 384x288 | 0.754 | 0.908 | 0.828 | 0.805 | 0.943 | ckpt | log |