cascade_rcnn_x101_64x4d_fpn_coco.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. checkpoint_config = dict(interval=1)
  2. # yapf:disable
  3. log_config = dict(
  4. interval=50,
  5. hooks=[
  6. dict(type='TextLoggerHook'),
  7. # dict(type='TensorboardLoggerHook')
  8. ])
  9. # yapf:enable
  10. dist_params = dict(backend='nccl')
  11. log_level = 'INFO'
  12. load_from = None
  13. resume_from = None
  14. workflow = [('train', 1)]
  15. # optimizer
  16. optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
  17. optimizer_config = dict(grad_clip=None)
  18. # learning policy
  19. lr_config = dict(
  20. policy='step',
  21. warmup='linear',
  22. warmup_iters=500,
  23. warmup_ratio=0.001,
  24. step=[16, 19])
  25. total_epochs = 20
  26. # model settings
  27. model = dict(
  28. type='CascadeRCNN',
  29. pretrained='open-mmlab://resnext101_64x4d',
  30. backbone=dict(
  31. type='ResNeXt',
  32. depth=101,
  33. groups=64,
  34. base_width=4,
  35. num_stages=4,
  36. out_indices=(0, 1, 2, 3),
  37. frozen_stages=1,
  38. norm_cfg=dict(type='BN', requires_grad=True),
  39. style='pytorch'),
  40. neck=dict(
  41. type='FPN',
  42. in_channels=[256, 512, 1024, 2048],
  43. out_channels=256,
  44. num_outs=5),
  45. rpn_head=dict(
  46. type='RPNHead',
  47. in_channels=256,
  48. feat_channels=256,
  49. anchor_generator=dict(
  50. type='AnchorGenerator',
  51. scales=[8],
  52. ratios=[0.5, 1.0, 2.0],
  53. strides=[4, 8, 16, 32, 64]),
  54. bbox_coder=dict(
  55. type='DeltaXYWHBBoxCoder',
  56. target_means=[.0, .0, .0, .0],
  57. target_stds=[1.0, 1.0, 1.0, 1.0]),
  58. loss_cls=dict(
  59. type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
  60. loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
  61. roi_head=dict(
  62. type='CascadeRoIHead',
  63. num_stages=3,
  64. stage_loss_weights=[1, 0.5, 0.25],
  65. bbox_roi_extractor=dict(
  66. type='SingleRoIExtractor',
  67. roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
  68. out_channels=256,
  69. featmap_strides=[4, 8, 16, 32]),
  70. bbox_head=[
  71. dict(
  72. type='Shared2FCBBoxHead',
  73. in_channels=256,
  74. fc_out_channels=1024,
  75. roi_feat_size=7,
  76. num_classes=80,
  77. bbox_coder=dict(
  78. type='DeltaXYWHBBoxCoder',
  79. target_means=[0., 0., 0., 0.],
  80. target_stds=[0.1, 0.1, 0.2, 0.2]),
  81. reg_class_agnostic=True,
  82. loss_cls=dict(
  83. type='CrossEntropyLoss',
  84. use_sigmoid=False,
  85. loss_weight=1.0),
  86. loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
  87. loss_weight=1.0)),
  88. dict(
  89. type='Shared2FCBBoxHead',
  90. in_channels=256,
  91. fc_out_channels=1024,
  92. roi_feat_size=7,
  93. num_classes=80,
  94. bbox_coder=dict(
  95. type='DeltaXYWHBBoxCoder',
  96. target_means=[0., 0., 0., 0.],
  97. target_stds=[0.05, 0.05, 0.1, 0.1]),
  98. reg_class_agnostic=True,
  99. loss_cls=dict(
  100. type='CrossEntropyLoss',
  101. use_sigmoid=False,
  102. loss_weight=1.0),
  103. loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
  104. loss_weight=1.0)),
  105. dict(
  106. type='Shared2FCBBoxHead',
  107. in_channels=256,
  108. fc_out_channels=1024,
  109. roi_feat_size=7,
  110. num_classes=80,
  111. bbox_coder=dict(
  112. type='DeltaXYWHBBoxCoder',
  113. target_means=[0., 0., 0., 0.],
  114. target_stds=[0.033, 0.033, 0.067, 0.067]),
  115. reg_class_agnostic=True,
  116. loss_cls=dict(
  117. type='CrossEntropyLoss',
  118. use_sigmoid=False,
  119. loss_weight=1.0),
  120. loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
  121. ]),
  122. # model training and testing settings
  123. train_cfg=dict(
  124. rpn=dict(
  125. assigner=dict(
  126. type='MaxIoUAssigner',
  127. pos_iou_thr=0.7,
  128. neg_iou_thr=0.3,
  129. min_pos_iou=0.3,
  130. match_low_quality=True,
  131. ignore_iof_thr=-1),
  132. sampler=dict(
  133. type='RandomSampler',
  134. num=256,
  135. pos_fraction=0.5,
  136. neg_pos_ub=-1,
  137. add_gt_as_proposals=False),
  138. allowed_border=0,
  139. pos_weight=-1,
  140. debug=False),
  141. rpn_proposal=dict(
  142. nms_pre=2000,
  143. max_per_img=2000,
  144. nms=dict(type='nms', iou_threshold=0.7),
  145. min_bbox_size=0),
  146. rcnn=[
  147. dict(
  148. assigner=dict(
  149. type='MaxIoUAssigner',
  150. pos_iou_thr=0.5,
  151. neg_iou_thr=0.5,
  152. min_pos_iou=0.5,
  153. match_low_quality=False,
  154. ignore_iof_thr=-1),
  155. sampler=dict(
  156. type='RandomSampler',
  157. num=512,
  158. pos_fraction=0.25,
  159. neg_pos_ub=-1,
  160. add_gt_as_proposals=True),
  161. pos_weight=-1,
  162. debug=False),
  163. dict(
  164. assigner=dict(
  165. type='MaxIoUAssigner',
  166. pos_iou_thr=0.6,
  167. neg_iou_thr=0.6,
  168. min_pos_iou=0.6,
  169. match_low_quality=False,
  170. ignore_iof_thr=-1),
  171. sampler=dict(
  172. type='RandomSampler',
  173. num=512,
  174. pos_fraction=0.25,
  175. neg_pos_ub=-1,
  176. add_gt_as_proposals=True),
  177. pos_weight=-1,
  178. debug=False),
  179. dict(
  180. assigner=dict(
  181. type='MaxIoUAssigner',
  182. pos_iou_thr=0.7,
  183. neg_iou_thr=0.7,
  184. min_pos_iou=0.7,
  185. match_low_quality=False,
  186. ignore_iof_thr=-1),
  187. sampler=dict(
  188. type='RandomSampler',
  189. num=512,
  190. pos_fraction=0.25,
  191. neg_pos_ub=-1,
  192. add_gt_as_proposals=True),
  193. pos_weight=-1,
  194. debug=False)
  195. ]),
  196. test_cfg=dict(
  197. rpn=dict(
  198. nms_pre=1000,
  199. max_per_img=1000,
  200. nms=dict(type='nms', iou_threshold=0.7),
  201. min_bbox_size=0),
  202. rcnn=dict(
  203. score_thr=0.05,
  204. nms=dict(type='nms', iou_threshold=0.5),
  205. max_per_img=100)))
  206. dataset_type = 'CocoDataset'
  207. data_root = 'data/coco'
  208. img_norm_cfg = dict(
  209. mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
  210. train_pipeline = [
  211. dict(type='LoadImageFromFile'),
  212. dict(type='LoadAnnotations', with_bbox=True),
  213. dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
  214. dict(type='RandomFlip', flip_ratio=0.5),
  215. dict(type='Normalize', **img_norm_cfg),
  216. dict(type='Pad', size_divisor=32),
  217. dict(type='DefaultFormatBundle'),
  218. dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
  219. ]
  220. test_pipeline = [
  221. dict(type='LoadImageFromFile'),
  222. dict(
  223. type='MultiScaleFlipAug',
  224. img_scale=(1333, 800),
  225. flip=False,
  226. transforms=[
  227. dict(type='Resize', keep_ratio=True),
  228. dict(type='RandomFlip'),
  229. dict(type='Normalize', **img_norm_cfg),
  230. dict(type='Pad', size_divisor=32),
  231. dict(type='DefaultFormatBundle'),
  232. dict(type='Collect', keys=['img']),
  233. ])
  234. ]
  235. data = dict(
  236. samples_per_gpu=2,
  237. workers_per_gpu=2,
  238. train=dict(
  239. type=dataset_type,
  240. ann_file=f'{data_root}/annotations/instances_train2017.json',
  241. img_prefix=f'{data_root}/train2017/',
  242. pipeline=train_pipeline),
  243. val=dict(
  244. type=dataset_type,
  245. ann_file=f'{data_root}/annotations/instances_val2017.json',
  246. img_prefix=f'{data_root}/val2017/',
  247. pipeline=test_pipeline),
  248. test=dict(
  249. type=dataset_type,
  250. ann_file=f'{data_root}/annotations/instances_val2017.json',
  251. img_prefix=f'{data_root}/val2017/',
  252. pipeline=test_pipeline))
  253. evaluation = dict(interval=1, metric='bbox')