cascade-mask-rcnn_r50_fpn.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. # model settings
  2. model = dict(
  3. type='CascadeRCNN',
  4. data_preprocessor=dict(
  5. type='DetDataPreprocessor',
  6. mean=[123.675, 116.28, 103.53],
  7. std=[58.395, 57.12, 57.375],
  8. bgr_to_rgb=True,
  9. pad_mask=True,
  10. pad_size_divisor=32),
  11. backbone=dict(
  12. type='ResNet',
  13. depth=50,
  14. num_stages=4,
  15. out_indices=(0, 1, 2, 3),
  16. frozen_stages=1,
  17. norm_cfg=dict(type='BN', requires_grad=True),
  18. norm_eval=True,
  19. style='pytorch',
  20. init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
  21. neck=dict(
  22. type='FPN',
  23. in_channels=[256, 512, 1024, 2048],
  24. out_channels=256,
  25. num_outs=5),
  26. rpn_head=dict(
  27. type='RPNHead',
  28. in_channels=256,
  29. feat_channels=256,
  30. anchor_generator=dict(
  31. type='AnchorGenerator',
  32. scales=[8],
  33. ratios=[0.5, 1.0, 2.0],
  34. strides=[4, 8, 16, 32, 64]),
  35. bbox_coder=dict(
  36. type='DeltaXYWHBBoxCoder',
  37. target_means=[.0, .0, .0, .0],
  38. target_stds=[1.0, 1.0, 1.0, 1.0]),
  39. loss_cls=dict(
  40. type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
  41. loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
  42. roi_head=dict(
  43. type='CascadeRoIHead',
  44. num_stages=3,
  45. stage_loss_weights=[1, 0.5, 0.25],
  46. bbox_roi_extractor=dict(
  47. type='SingleRoIExtractor',
  48. roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
  49. out_channels=256,
  50. featmap_strides=[4, 8, 16, 32]),
  51. bbox_head=[
  52. dict(
  53. type='Shared2FCBBoxHead',
  54. in_channels=256,
  55. fc_out_channels=1024,
  56. roi_feat_size=7,
  57. num_classes=80,
  58. bbox_coder=dict(
  59. type='DeltaXYWHBBoxCoder',
  60. target_means=[0., 0., 0., 0.],
  61. target_stds=[0.1, 0.1, 0.2, 0.2]),
  62. reg_class_agnostic=True,
  63. loss_cls=dict(
  64. type='CrossEntropyLoss',
  65. use_sigmoid=False,
  66. loss_weight=1.0),
  67. loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
  68. loss_weight=1.0)),
  69. dict(
  70. type='Shared2FCBBoxHead',
  71. in_channels=256,
  72. fc_out_channels=1024,
  73. roi_feat_size=7,
  74. num_classes=80,
  75. bbox_coder=dict(
  76. type='DeltaXYWHBBoxCoder',
  77. target_means=[0., 0., 0., 0.],
  78. target_stds=[0.05, 0.05, 0.1, 0.1]),
  79. reg_class_agnostic=True,
  80. loss_cls=dict(
  81. type='CrossEntropyLoss',
  82. use_sigmoid=False,
  83. loss_weight=1.0),
  84. loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
  85. loss_weight=1.0)),
  86. dict(
  87. type='Shared2FCBBoxHead',
  88. in_channels=256,
  89. fc_out_channels=1024,
  90. roi_feat_size=7,
  91. num_classes=80,
  92. bbox_coder=dict(
  93. type='DeltaXYWHBBoxCoder',
  94. target_means=[0., 0., 0., 0.],
  95. target_stds=[0.033, 0.033, 0.067, 0.067]),
  96. reg_class_agnostic=True,
  97. loss_cls=dict(
  98. type='CrossEntropyLoss',
  99. use_sigmoid=False,
  100. loss_weight=1.0),
  101. loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
  102. ],
  103. mask_roi_extractor=dict(
  104. type='SingleRoIExtractor',
  105. roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
  106. out_channels=256,
  107. featmap_strides=[4, 8, 16, 32]),
  108. mask_head=dict(
  109. type='FCNMaskHead',
  110. num_convs=4,
  111. in_channels=256,
  112. conv_out_channels=256,
  113. num_classes=80,
  114. loss_mask=dict(
  115. type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
  116. # model training and testing settings
  117. train_cfg=dict(
  118. rpn=dict(
  119. assigner=dict(
  120. type='MaxIoUAssigner',
  121. pos_iou_thr=0.7,
  122. neg_iou_thr=0.3,
  123. min_pos_iou=0.3,
  124. match_low_quality=True,
  125. ignore_iof_thr=-1),
  126. sampler=dict(
  127. type='RandomSampler',
  128. num=256,
  129. pos_fraction=0.5,
  130. neg_pos_ub=-1,
  131. add_gt_as_proposals=False),
  132. allowed_border=0,
  133. pos_weight=-1,
  134. debug=False),
  135. rpn_proposal=dict(
  136. nms_pre=2000,
  137. max_per_img=2000,
  138. nms=dict(type='nms', iou_threshold=0.7),
  139. min_bbox_size=0),
  140. rcnn=[
  141. dict(
  142. assigner=dict(
  143. type='MaxIoUAssigner',
  144. pos_iou_thr=0.5,
  145. neg_iou_thr=0.5,
  146. min_pos_iou=0.5,
  147. match_low_quality=False,
  148. ignore_iof_thr=-1),
  149. sampler=dict(
  150. type='RandomSampler',
  151. num=512,
  152. pos_fraction=0.25,
  153. neg_pos_ub=-1,
  154. add_gt_as_proposals=True),
  155. mask_size=28,
  156. pos_weight=-1,
  157. debug=False),
  158. dict(
  159. assigner=dict(
  160. type='MaxIoUAssigner',
  161. pos_iou_thr=0.6,
  162. neg_iou_thr=0.6,
  163. min_pos_iou=0.6,
  164. match_low_quality=False,
  165. ignore_iof_thr=-1),
  166. sampler=dict(
  167. type='RandomSampler',
  168. num=512,
  169. pos_fraction=0.25,
  170. neg_pos_ub=-1,
  171. add_gt_as_proposals=True),
  172. mask_size=28,
  173. pos_weight=-1,
  174. debug=False),
  175. dict(
  176. assigner=dict(
  177. type='MaxIoUAssigner',
  178. pos_iou_thr=0.7,
  179. neg_iou_thr=0.7,
  180. min_pos_iou=0.7,
  181. match_low_quality=False,
  182. ignore_iof_thr=-1),
  183. sampler=dict(
  184. type='RandomSampler',
  185. num=512,
  186. pos_fraction=0.25,
  187. neg_pos_ub=-1,
  188. add_gt_as_proposals=True),
  189. mask_size=28,
  190. pos_weight=-1,
  191. debug=False)
  192. ]),
  193. test_cfg=dict(
  194. rpn=dict(
  195. nms_pre=1000,
  196. max_per_img=1000,
  197. nms=dict(type='nms', iou_threshold=0.7),
  198. min_bbox_size=0),
  199. rcnn=dict(
  200. score_thr=0.05,
  201. nms=dict(type='nms', iou_threshold=0.5),
  202. max_per_img=100,
  203. mask_thr_binary=0.5)))