rtmpose-m_8xb256-210e_body8-256x192.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553
  1. _base_ = ['../../../_base_/default_runtime.py']
  2. # runtime
  3. max_epochs = 210
  4. stage2_num_epochs = 30
  5. base_lr = 4e-3
  6. train_cfg = dict(max_epochs=max_epochs, val_interval=10)
  7. randomness = dict(seed=21)
  8. # optimizer
  9. optim_wrapper = dict(
  10. type='OptimWrapper',
  11. optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
  12. paramwise_cfg=dict(
  13. norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
  14. # learning rate
  15. param_scheduler = [
  16. dict(
  17. type='LinearLR',
  18. start_factor=1.0e-5,
  19. by_epoch=False,
  20. begin=0,
  21. end=1000),
  22. dict(
  23. # use cosine lr from 210 to 420 epoch
  24. type='CosineAnnealingLR',
  25. eta_min=base_lr * 0.05,
  26. begin=max_epochs // 2,
  27. end=max_epochs,
  28. T_max=max_epochs // 2,
  29. by_epoch=True,
  30. convert_to_iter_based=True),
  31. ]
  32. # automatically scaling LR based on the actual training batch size
  33. auto_scale_lr = dict(base_batch_size=1024)
  34. # codec settings
  35. codec = dict(
  36. type='SimCCLabel',
  37. input_size=(192, 256),
  38. sigma=(4.9, 5.66),
  39. simcc_split_ratio=2.0,
  40. normalize=False,
  41. use_dark=False)
  42. # model settings
  43. model = dict(
  44. type='TopdownPoseEstimator',
  45. data_preprocessor=dict(
  46. type='PoseDataPreprocessor',
  47. mean=[123.675, 116.28, 103.53],
  48. std=[58.395, 57.12, 57.375],
  49. bgr_to_rgb=True),
  50. backbone=dict(
  51. _scope_='mmdet',
  52. type='CSPNeXt',
  53. arch='P5',
  54. expand_ratio=0.5,
  55. deepen_factor=0.67,
  56. widen_factor=0.75,
  57. out_indices=(4, ),
  58. channel_attention=True,
  59. norm_cfg=dict(type='SyncBN'),
  60. act_cfg=dict(type='SiLU'),
  61. init_cfg=dict(
  62. type='Pretrained',
  63. prefix='backbone.',
  64. checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
  65. 'rtmposev1/cspnext-m_udp-body7_210e-256x192-e0c9327b_20230504.pth' # noqa
  66. )),
  67. head=dict(
  68. type='RTMCCHead',
  69. in_channels=768,
  70. out_channels=17,
  71. input_size=codec['input_size'],
  72. in_featuremap_size=(6, 8),
  73. simcc_split_ratio=codec['simcc_split_ratio'],
  74. final_layer_kernel_size=7,
  75. gau_cfg=dict(
  76. hidden_dims=256,
  77. s=128,
  78. expansion_factor=2,
  79. dropout_rate=0.0,
  80. drop_path=0.0,
  81. act_fn='SiLU',
  82. use_rel_bias=False,
  83. pos_enc=False),
  84. loss=dict(
  85. type='KLDiscretLoss',
  86. use_target_weight=True,
  87. beta=10.,
  88. label_softmax=True),
  89. decoder=codec),
  90. test_cfg=dict(flip_test=True, ))
  91. # base dataset settings
  92. dataset_type = 'CocoDataset'
  93. data_mode = 'topdown'
  94. data_root = 'data/'
  95. backend_args = dict(backend='local')
  96. # pipelines
  97. train_pipeline = [
  98. dict(type='LoadImage', backend_args=backend_args),
  99. dict(type='GetBBoxCenterScale'),
  100. dict(type='RandomFlip', direction='horizontal'),
  101. dict(type='RandomHalfBody'),
  102. dict(
  103. type='RandomBBoxTransform', scale_factor=[0.5, 1.5], rotate_factor=90),
  104. dict(type='TopdownAffine', input_size=codec['input_size']),
  105. dict(type='mmdet.YOLOXHSVRandomAug'),
  106. dict(type='PhotometricDistortion'),
  107. dict(
  108. type='Albumentation',
  109. transforms=[
  110. dict(type='Blur', p=0.1),
  111. dict(type='MedianBlur', p=0.1),
  112. dict(
  113. type='CoarseDropout',
  114. max_holes=1,
  115. max_height=0.4,
  116. max_width=0.4,
  117. min_holes=1,
  118. min_height=0.2,
  119. min_width=0.2,
  120. p=1.0),
  121. ]),
  122. dict(type='GenerateTarget', encoder=codec),
  123. dict(type='PackPoseInputs')
  124. ]
  125. val_pipeline = [
  126. dict(type='LoadImage', backend_args=backend_args),
  127. dict(type='GetBBoxCenterScale'),
  128. dict(type='TopdownAffine', input_size=codec['input_size']),
  129. dict(type='PackPoseInputs')
  130. ]
  131. train_pipeline_stage2 = [
  132. dict(type='LoadImage', backend_args=backend_args),
  133. dict(type='GetBBoxCenterScale'),
  134. dict(type='RandomFlip', direction='horizontal'),
  135. dict(type='RandomHalfBody'),
  136. dict(
  137. type='RandomBBoxTransform',
  138. shift_factor=0.,
  139. scale_factor=[0.5, 1.5],
  140. rotate_factor=90),
  141. dict(type='TopdownAffine', input_size=codec['input_size']),
  142. dict(type='mmdet.YOLOXHSVRandomAug'),
  143. dict(
  144. type='Albumentation',
  145. transforms=[
  146. dict(type='Blur', p=0.1),
  147. dict(type='MedianBlur', p=0.1),
  148. dict(
  149. type='CoarseDropout',
  150. max_holes=1,
  151. max_height=0.4,
  152. max_width=0.4,
  153. min_holes=1,
  154. min_height=0.2,
  155. min_width=0.2,
  156. p=0.5),
  157. ]),
  158. dict(type='GenerateTarget', encoder=codec),
  159. dict(type='PackPoseInputs')
  160. ]
  161. # mapping
  162. aic_coco = [
  163. (0, 6),
  164. (1, 8),
  165. (2, 10),
  166. (3, 5),
  167. (4, 7),
  168. (5, 9),
  169. (6, 12),
  170. (7, 14),
  171. (8, 16),
  172. (9, 11),
  173. (10, 13),
  174. (11, 15),
  175. ]
  176. crowdpose_coco = [
  177. (0, 5),
  178. (1, 6),
  179. (2, 7),
  180. (3, 8),
  181. (4, 9),
  182. (5, 10),
  183. (6, 11),
  184. (7, 12),
  185. (8, 13),
  186. (9, 14),
  187. (10, 15),
  188. (11, 16),
  189. ]
  190. mpii_coco = [
  191. (0, 16),
  192. (1, 14),
  193. (2, 12),
  194. (3, 11),
  195. (4, 13),
  196. (5, 15),
  197. (10, 10),
  198. (11, 8),
  199. (12, 6),
  200. (13, 5),
  201. (14, 7),
  202. (15, 9),
  203. ]
  204. jhmdb_coco = [
  205. (3, 6),
  206. (4, 5),
  207. (5, 12),
  208. (6, 11),
  209. (7, 8),
  210. (8, 7),
  211. (9, 14),
  212. (10, 13),
  213. (11, 10),
  214. (12, 9),
  215. (13, 16),
  216. (14, 15),
  217. ]
  218. halpe_coco = [
  219. (0, 0),
  220. (1, 1),
  221. (2, 2),
  222. (3, 3),
  223. (4, 4),
  224. (5, 5),
  225. (6, 6),
  226. (7, 7),
  227. (8, 8),
  228. (9, 9),
  229. (10, 10),
  230. (11, 11),
  231. (12, 12),
  232. (13, 13),
  233. (14, 14),
  234. (15, 15),
  235. (16, 16),
  236. ]
  237. ochuman_coco = [
  238. (0, 0),
  239. (1, 1),
  240. (2, 2),
  241. (3, 3),
  242. (4, 4),
  243. (5, 5),
  244. (6, 6),
  245. (7, 7),
  246. (8, 8),
  247. (9, 9),
  248. (10, 10),
  249. (11, 11),
  250. (12, 12),
  251. (13, 13),
  252. (14, 14),
  253. (15, 15),
  254. (16, 16),
  255. ]
  256. posetrack_coco = [
  257. (0, 0),
  258. (3, 3),
  259. (4, 4),
  260. (5, 5),
  261. (6, 6),
  262. (7, 7),
  263. (8, 8),
  264. (9, 9),
  265. (10, 10),
  266. (11, 11),
  267. (12, 12),
  268. (13, 13),
  269. (14, 14),
  270. (15, 15),
  271. (16, 16),
  272. ]
  273. # train datasets
  274. dataset_coco = dict(
  275. type=dataset_type,
  276. data_root=data_root,
  277. data_mode=data_mode,
  278. ann_file='coco/annotations/person_keypoints_train2017.json',
  279. data_prefix=dict(img='detection/coco/train2017/'),
  280. pipeline=[],
  281. )
  282. dataset_aic = dict(
  283. type='AicDataset',
  284. data_root=data_root,
  285. data_mode=data_mode,
  286. ann_file='aic/annotations/aic_train.json',
  287. data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint'
  288. '_train_20170902/keypoint_train_images_20170902/'),
  289. pipeline=[
  290. dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco)
  291. ],
  292. )
  293. dataset_crowdpose = dict(
  294. type='CrowdPoseDataset',
  295. data_root=data_root,
  296. data_mode=data_mode,
  297. ann_file='crowdpose/annotations/mmpose_crowdpose_trainval.json',
  298. data_prefix=dict(img='pose/CrowdPose/images/'),
  299. pipeline=[
  300. dict(
  301. type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco)
  302. ],
  303. )
  304. dataset_mpii = dict(
  305. type='MpiiDataset',
  306. data_root=data_root,
  307. data_mode=data_mode,
  308. ann_file='mpii/annotations/mpii_train.json',
  309. data_prefix=dict(img='pose/MPI/images/'),
  310. pipeline=[
  311. dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco)
  312. ],
  313. )
  314. dataset_jhmdb = dict(
  315. type='JhmdbDataset',
  316. data_root=data_root,
  317. data_mode=data_mode,
  318. ann_file='jhmdb/annotations/Sub1_train.json',
  319. data_prefix=dict(img='pose/JHMDB/'),
  320. pipeline=[
  321. dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco)
  322. ],
  323. )
  324. dataset_halpe = dict(
  325. type='HalpeDataset',
  326. data_root=data_root,
  327. data_mode=data_mode,
  328. ann_file='halpe/annotations/halpe_train_v1.json',
  329. data_prefix=dict(img='pose/Halpe/hico_20160224_det/images/train2015'),
  330. pipeline=[
  331. dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco)
  332. ],
  333. )
  334. dataset_posetrack = dict(
  335. type='PoseTrack18Dataset',
  336. data_root=data_root,
  337. data_mode=data_mode,
  338. ann_file='posetrack18/annotations/posetrack18_train.json',
  339. data_prefix=dict(img='pose/PoseChallenge2018/'),
  340. pipeline=[
  341. dict(
  342. type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco)
  343. ],
  344. )
  345. # data loaders
  346. train_dataloader = dict(
  347. batch_size=256,
  348. num_workers=10,
  349. persistent_workers=True,
  350. sampler=dict(type='DefaultSampler', shuffle=True),
  351. dataset=dict(
  352. type='CombinedDataset',
  353. metainfo=dict(from_file='configs/_base_/datasets/coco.py'),
  354. datasets=[
  355. dataset_coco,
  356. dataset_aic,
  357. dataset_crowdpose,
  358. dataset_mpii,
  359. dataset_jhmdb,
  360. dataset_halpe,
  361. dataset_posetrack,
  362. ],
  363. pipeline=train_pipeline,
  364. test_mode=False,
  365. ))
  366. # val datasets
  367. val_coco = dict(
  368. type=dataset_type,
  369. data_root=data_root,
  370. data_mode=data_mode,
  371. ann_file='coco/annotations/person_keypoints_val2017.json',
  372. data_prefix=dict(img='detection/coco/val2017/'),
  373. pipeline=[],
  374. )
  375. val_aic = dict(
  376. type='AicDataset',
  377. data_root=data_root,
  378. data_mode=data_mode,
  379. ann_file='aic/annotations/aic_val.json',
  380. data_prefix=dict(
  381. img='pose/ai_challenge/ai_challenger_keypoint'
  382. '_validation_20170911/keypoint_validation_images_20170911/'),
  383. pipeline=[
  384. dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco)
  385. ],
  386. )
  387. val_crowdpose = dict(
  388. type='CrowdPoseDataset',
  389. data_root=data_root,
  390. data_mode=data_mode,
  391. ann_file='crowdpose/annotations/mmpose_crowdpose_test.json',
  392. data_prefix=dict(img='pose/CrowdPose/images/'),
  393. pipeline=[
  394. dict(
  395. type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco)
  396. ],
  397. )
  398. val_mpii = dict(
  399. type='MpiiDataset',
  400. data_root=data_root,
  401. data_mode=data_mode,
  402. ann_file='mpii/annotations/mpii_val.json',
  403. data_prefix=dict(img='pose/MPI/images/'),
  404. pipeline=[
  405. dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco)
  406. ],
  407. )
  408. val_jhmdb = dict(
  409. type='JhmdbDataset',
  410. data_root=data_root,
  411. data_mode=data_mode,
  412. ann_file='jhmdb/annotations/Sub1_test.json',
  413. data_prefix=dict(img='pose/JHMDB/'),
  414. pipeline=[
  415. dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco)
  416. ],
  417. )
  418. val_halpe = dict(
  419. type='HalpeDataset',
  420. data_root=data_root,
  421. data_mode=data_mode,
  422. ann_file='halpe/annotations/halpe_val_v1.json',
  423. data_prefix=dict(img='detection/coco/val2017/'),
  424. pipeline=[
  425. dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco)
  426. ],
  427. )
  428. val_ochuman = dict(
  429. type='OCHumanDataset',
  430. data_root=data_root,
  431. data_mode=data_mode,
  432. ann_file='ochuman/annotations/'
  433. 'ochuman_coco_format_val_range_0.00_1.00.json',
  434. data_prefix=dict(img='pose/OCHuman/images/'),
  435. pipeline=[
  436. dict(type='KeypointConverter', num_keypoints=17, mapping=ochuman_coco)
  437. ],
  438. )
  439. val_posetrack = dict(
  440. type='PoseTrack18Dataset',
  441. data_root=data_root,
  442. data_mode=data_mode,
  443. ann_file='posetrack18/annotations/posetrack18_val.json',
  444. data_prefix=dict(img='pose/PoseChallenge2018/'),
  445. pipeline=[
  446. dict(
  447. type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco)
  448. ],
  449. )
  450. val_dataloader = dict(
  451. batch_size=64,
  452. num_workers=10,
  453. persistent_workers=True,
  454. drop_last=False,
  455. sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
  456. dataset=dict(
  457. type=dataset_type,
  458. data_root=data_root,
  459. data_mode=data_mode,
  460. ann_file='coco/annotations/person_keypoints_val2017.json',
  461. bbox_file=f'{data_root}coco/person_detection_results/'
  462. 'COCO_val2017_detections_AP_H_56_person.json',
  463. data_prefix=dict(img='detection/coco/val2017/'),
  464. test_mode=True,
  465. pipeline=val_pipeline,
  466. ))
  467. test_dataloader = dict(
  468. batch_size=64,
  469. num_workers=10,
  470. persistent_workers=True,
  471. drop_last=False,
  472. sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
  473. dataset=dict(
  474. type='CombinedDataset',
  475. metainfo=dict(from_file='configs/_base_/datasets/coco.py'),
  476. datasets=[
  477. val_coco,
  478. val_aic,
  479. val_crowdpose,
  480. val_mpii,
  481. val_jhmdb,
  482. val_halpe,
  483. val_ochuman,
  484. val_posetrack,
  485. ],
  486. pipeline=val_pipeline,
  487. test_mode=True,
  488. ))
  489. # hooks
  490. default_hooks = dict(
  491. checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
  492. # default_hooks = dict(
  493. # checkpoint=dict(save_best='AUC', rule='greater', max_keep_ckpts=1))
  494. custom_hooks = [
  495. dict(
  496. type='EMAHook',
  497. ema_type='ExpMomentumEMA',
  498. momentum=0.0002,
  499. update_buffers=True,
  500. priority=49),
  501. dict(
  502. type='mmdet.PipelineSwitchHook',
  503. switch_epoch=max_epochs - stage2_num_epochs,
  504. switch_pipeline=train_pipeline_stage2)
  505. ]
  506. # evaluators
  507. val_evaluator = dict(
  508. type='CocoMetric',
  509. ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json')
  510. test_evaluator = [
  511. dict(type='PCKAccuracy', thr=0.1),
  512. dict(type='AUC'),
  513. dict(type='EPE')
  514. ]