mask_rcnn_r50_fpn_2x_coco.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. model = dict(
  2. type='MaskRCNN',
  3. backbone=dict(
  4. type='ResNet',
  5. depth=50,
  6. num_stages=4,
  7. out_indices=(0, 1, 2, 3),
  8. frozen_stages=1,
  9. norm_cfg=dict(type='BN', requires_grad=True),
  10. norm_eval=True,
  11. style='pytorch',
  12. init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
  13. neck=dict(
  14. type='FPN',
  15. in_channels=[256, 512, 1024, 2048],
  16. out_channels=256,
  17. num_outs=5),
  18. rpn_head=dict(
  19. type='RPNHead',
  20. in_channels=256,
  21. feat_channels=256,
  22. anchor_generator=dict(
  23. type='AnchorGenerator',
  24. scales=[8],
  25. ratios=[0.5, 1.0, 2.0],
  26. strides=[4, 8, 16, 32, 64]),
  27. bbox_coder=dict(
  28. type='DeltaXYWHBBoxCoder',
  29. target_means=[0.0, 0.0, 0.0, 0.0],
  30. target_stds=[1.0, 1.0, 1.0, 1.0]),
  31. loss_cls=dict(
  32. type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
  33. loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
  34. roi_head=dict(
  35. type='StandardRoIHead',
  36. bbox_roi_extractor=dict(
  37. type='SingleRoIExtractor',
  38. roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
  39. out_channels=256,
  40. featmap_strides=[4, 8, 16, 32]),
  41. bbox_head=dict(
  42. type='Shared2FCBBoxHead',
  43. in_channels=256,
  44. fc_out_channels=1024,
  45. roi_feat_size=7,
  46. num_classes=80,
  47. bbox_coder=dict(
  48. type='DeltaXYWHBBoxCoder',
  49. target_means=[0.0, 0.0, 0.0, 0.0],
  50. target_stds=[0.1, 0.1, 0.2, 0.2]),
  51. reg_class_agnostic=False,
  52. loss_cls=dict(
  53. type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
  54. loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
  55. mask_roi_extractor=dict(
  56. type='SingleRoIExtractor',
  57. roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
  58. out_channels=256,
  59. featmap_strides=[4, 8, 16, 32]),
  60. mask_head=dict(
  61. type='FCNMaskHead',
  62. num_convs=4,
  63. in_channels=256,
  64. conv_out_channels=256,
  65. num_classes=80,
  66. loss_mask=dict(
  67. type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
  68. train_cfg=dict(
  69. rpn=dict(
  70. assigner=dict(
  71. type='MaxIoUAssigner',
  72. pos_iou_thr=0.7,
  73. neg_iou_thr=0.3,
  74. min_pos_iou=0.3,
  75. match_low_quality=True,
  76. ignore_iof_thr=-1),
  77. sampler=dict(
  78. type='RandomSampler',
  79. num=256,
  80. pos_fraction=0.5,
  81. neg_pos_ub=-1,
  82. add_gt_as_proposals=False),
  83. allowed_border=-1,
  84. pos_weight=-1,
  85. debug=False),
  86. rpn_proposal=dict(
  87. nms_pre=2000,
  88. max_per_img=1000,
  89. nms=dict(type='nms', iou_threshold=0.7),
  90. min_bbox_size=0),
  91. rcnn=dict(
  92. assigner=dict(
  93. type='MaxIoUAssigner',
  94. pos_iou_thr=0.5,
  95. neg_iou_thr=0.5,
  96. min_pos_iou=0.5,
  97. match_low_quality=True,
  98. ignore_iof_thr=-1),
  99. sampler=dict(
  100. type='RandomSampler',
  101. num=512,
  102. pos_fraction=0.25,
  103. neg_pos_ub=-1,
  104. add_gt_as_proposals=True),
  105. mask_size=28,
  106. pos_weight=-1,
  107. debug=False)),
  108. test_cfg=dict(
  109. rpn=dict(
  110. nms_pre=1000,
  111. max_per_img=1000,
  112. nms=dict(type='nms', iou_threshold=0.7),
  113. min_bbox_size=0),
  114. rcnn=dict(
  115. score_thr=0.05,
  116. nms=dict(type='nms', iou_threshold=0.5),
  117. max_per_img=100,
  118. mask_thr_binary=0.5)))
  119. dataset_type = 'CocoDataset'
  120. data_root = 'data/coco/'
  121. img_norm_cfg = dict(
  122. mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
  123. train_pipeline = [
  124. dict(type='LoadImageFromFile'),
  125. dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
  126. dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
  127. dict(type='RandomFlip', flip_ratio=0.5),
  128. dict(
  129. type='Normalize',
  130. mean=[123.675, 116.28, 103.53],
  131. std=[58.395, 57.12, 57.375],
  132. to_rgb=True),
  133. dict(type='Pad', size_divisor=32),
  134. dict(type='DefaultFormatBundle'),
  135. dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])
  136. ]
  137. test_pipeline = [
  138. dict(type='LoadImageFromFile'),
  139. dict(
  140. type='MultiScaleFlipAug',
  141. img_scale=(1333, 800),
  142. flip=False,
  143. transforms=[
  144. dict(type='Resize', keep_ratio=True),
  145. dict(type='RandomFlip'),
  146. dict(
  147. type='Normalize',
  148. mean=[123.675, 116.28, 103.53],
  149. std=[58.395, 57.12, 57.375],
  150. to_rgb=True),
  151. dict(type='Pad', size_divisor=32),
  152. dict(type='ImageToTensor', keys=['img']),
  153. dict(type='Collect', keys=['img'])
  154. ])
  155. ]
  156. data = dict(
  157. samples_per_gpu=2,
  158. workers_per_gpu=2,
  159. train=dict(
  160. type='CocoDataset',
  161. ann_file='data/coco/annotations/instances_train2017.json',
  162. img_prefix='data/coco/train2017/',
  163. pipeline=[
  164. dict(type='LoadImageFromFile'),
  165. dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
  166. dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
  167. dict(type='RandomFlip', flip_ratio=0.5),
  168. dict(
  169. type='Normalize',
  170. mean=[123.675, 116.28, 103.53],
  171. std=[58.395, 57.12, 57.375],
  172. to_rgb=True),
  173. dict(type='Pad', size_divisor=32),
  174. dict(type='DefaultFormatBundle'),
  175. dict(
  176. type='Collect',
  177. keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])
  178. ]),
  179. val=dict(
  180. type='CocoDataset',
  181. ann_file='data/coco/annotations/instances_val2017.json',
  182. img_prefix='data/coco/val2017/',
  183. pipeline=[
  184. dict(type='LoadImageFromFile'),
  185. dict(
  186. type='MultiScaleFlipAug',
  187. img_scale=(1333, 800),
  188. flip=False,
  189. transforms=[
  190. dict(type='Resize', keep_ratio=True),
  191. dict(type='RandomFlip'),
  192. dict(
  193. type='Normalize',
  194. mean=[123.675, 116.28, 103.53],
  195. std=[58.395, 57.12, 57.375],
  196. to_rgb=True),
  197. dict(type='Pad', size_divisor=32),
  198. dict(type='ImageToTensor', keys=['img']),
  199. dict(type='Collect', keys=['img'])
  200. ])
  201. ]),
  202. test=dict(
  203. type='CocoDataset',
  204. ann_file='data/coco/annotations/instances_val2017.json',
  205. img_prefix='data/coco/val2017/',
  206. pipeline=[
  207. dict(type='LoadImageFromFile'),
  208. dict(
  209. type='MultiScaleFlipAug',
  210. img_scale=(1333, 800),
  211. flip=False,
  212. transforms=[
  213. dict(type='Resize', keep_ratio=True),
  214. dict(type='RandomFlip'),
  215. dict(
  216. type='Normalize',
  217. mean=[123.675, 116.28, 103.53],
  218. std=[58.395, 57.12, 57.375],
  219. to_rgb=True),
  220. dict(type='Pad', size_divisor=32),
  221. dict(type='ImageToTensor', keys=['img']),
  222. dict(type='Collect', keys=['img'])
  223. ])
  224. ]))
  225. evaluation = dict(metric=['bbox', 'segm'])
  226. optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
  227. optimizer_config = dict(grad_clip=None)
  228. lr_config = dict(
  229. policy='step',
  230. warmup='linear',
  231. warmup_iters=500,
  232. warmup_ratio=0.001,
  233. step=[16, 22])
  234. runner = dict(type='EpochBasedRunner', max_epochs=24)
  235. checkpoint_config = dict(interval=1)
  236. log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
  237. custom_hooks = [dict(type='NumClassCheckHook')]
  238. dist_params = dict(backend='nccl')
  239. log_level = 'INFO'
  240. load_from = None
  241. resume_from = None
  242. workflow = [('train', 1)]