htc-without-semantic_r50_fpn_1x_coco.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. _base_ = [
  2. '../_base_/datasets/coco_instance.py',
  3. '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
  4. ]
  5. # model settings
  6. model = dict(
  7. type='HybridTaskCascade',
  8. data_preprocessor=dict(
  9. type='DetDataPreprocessor',
  10. mean=[123.675, 116.28, 103.53],
  11. std=[58.395, 57.12, 57.375],
  12. bgr_to_rgb=True,
  13. pad_size_divisor=32),
  14. backbone=dict(
  15. type='ResNet',
  16. depth=50,
  17. num_stages=4,
  18. out_indices=(0, 1, 2, 3),
  19. frozen_stages=1,
  20. norm_cfg=dict(type='BN', requires_grad=True),
  21. norm_eval=True,
  22. style='pytorch',
  23. init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
  24. neck=dict(
  25. type='FPN',
  26. in_channels=[256, 512, 1024, 2048],
  27. out_channels=256,
  28. num_outs=5),
  29. rpn_head=dict(
  30. type='RPNHead',
  31. in_channels=256,
  32. feat_channels=256,
  33. anchor_generator=dict(
  34. type='AnchorGenerator',
  35. scales=[8],
  36. ratios=[0.5, 1.0, 2.0],
  37. strides=[4, 8, 16, 32, 64]),
  38. bbox_coder=dict(
  39. type='DeltaXYWHBBoxCoder',
  40. target_means=[.0, .0, .0, .0],
  41. target_stds=[1.0, 1.0, 1.0, 1.0]),
  42. loss_cls=dict(
  43. type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
  44. loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
  45. roi_head=dict(
  46. type='HybridTaskCascadeRoIHead',
  47. interleaved=True,
  48. mask_info_flow=True,
  49. num_stages=3,
  50. stage_loss_weights=[1, 0.5, 0.25],
  51. bbox_roi_extractor=dict(
  52. type='SingleRoIExtractor',
  53. roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
  54. out_channels=256,
  55. featmap_strides=[4, 8, 16, 32]),
  56. bbox_head=[
  57. dict(
  58. type='Shared2FCBBoxHead',
  59. in_channels=256,
  60. fc_out_channels=1024,
  61. roi_feat_size=7,
  62. num_classes=80,
  63. bbox_coder=dict(
  64. type='DeltaXYWHBBoxCoder',
  65. target_means=[0., 0., 0., 0.],
  66. target_stds=[0.1, 0.1, 0.2, 0.2]),
  67. reg_class_agnostic=True,
  68. loss_cls=dict(
  69. type='CrossEntropyLoss',
  70. use_sigmoid=False,
  71. loss_weight=1.0),
  72. loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
  73. loss_weight=1.0)),
  74. dict(
  75. type='Shared2FCBBoxHead',
  76. in_channels=256,
  77. fc_out_channels=1024,
  78. roi_feat_size=7,
  79. num_classes=80,
  80. bbox_coder=dict(
  81. type='DeltaXYWHBBoxCoder',
  82. target_means=[0., 0., 0., 0.],
  83. target_stds=[0.05, 0.05, 0.1, 0.1]),
  84. reg_class_agnostic=True,
  85. loss_cls=dict(
  86. type='CrossEntropyLoss',
  87. use_sigmoid=False,
  88. loss_weight=1.0),
  89. loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
  90. loss_weight=1.0)),
  91. dict(
  92. type='Shared2FCBBoxHead',
  93. in_channels=256,
  94. fc_out_channels=1024,
  95. roi_feat_size=7,
  96. num_classes=80,
  97. bbox_coder=dict(
  98. type='DeltaXYWHBBoxCoder',
  99. target_means=[0., 0., 0., 0.],
  100. target_stds=[0.033, 0.033, 0.067, 0.067]),
  101. reg_class_agnostic=True,
  102. loss_cls=dict(
  103. type='CrossEntropyLoss',
  104. use_sigmoid=False,
  105. loss_weight=1.0),
  106. loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
  107. ],
  108. mask_roi_extractor=dict(
  109. type='SingleRoIExtractor',
  110. roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
  111. out_channels=256,
  112. featmap_strides=[4, 8, 16, 32]),
  113. mask_head=[
  114. dict(
  115. type='HTCMaskHead',
  116. with_conv_res=False,
  117. num_convs=4,
  118. in_channels=256,
  119. conv_out_channels=256,
  120. num_classes=80,
  121. loss_mask=dict(
  122. type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)),
  123. dict(
  124. type='HTCMaskHead',
  125. num_convs=4,
  126. in_channels=256,
  127. conv_out_channels=256,
  128. num_classes=80,
  129. loss_mask=dict(
  130. type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)),
  131. dict(
  132. type='HTCMaskHead',
  133. num_convs=4,
  134. in_channels=256,
  135. conv_out_channels=256,
  136. num_classes=80,
  137. loss_mask=dict(
  138. type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))
  139. ]),
  140. # model training and testing settings
  141. train_cfg=dict(
  142. rpn=dict(
  143. assigner=dict(
  144. type='MaxIoUAssigner',
  145. pos_iou_thr=0.7,
  146. neg_iou_thr=0.3,
  147. min_pos_iou=0.3,
  148. ignore_iof_thr=-1),
  149. sampler=dict(
  150. type='RandomSampler',
  151. num=256,
  152. pos_fraction=0.5,
  153. neg_pos_ub=-1,
  154. add_gt_as_proposals=False),
  155. allowed_border=0,
  156. pos_weight=-1,
  157. debug=False),
  158. rpn_proposal=dict(
  159. nms_pre=2000,
  160. max_per_img=2000,
  161. nms=dict(type='nms', iou_threshold=0.7),
  162. min_bbox_size=0),
  163. rcnn=[
  164. dict(
  165. assigner=dict(
  166. type='MaxIoUAssigner',
  167. pos_iou_thr=0.5,
  168. neg_iou_thr=0.5,
  169. min_pos_iou=0.5,
  170. ignore_iof_thr=-1),
  171. sampler=dict(
  172. type='RandomSampler',
  173. num=512,
  174. pos_fraction=0.25,
  175. neg_pos_ub=-1,
  176. add_gt_as_proposals=True),
  177. mask_size=28,
  178. pos_weight=-1,
  179. debug=False),
  180. dict(
  181. assigner=dict(
  182. type='MaxIoUAssigner',
  183. pos_iou_thr=0.6,
  184. neg_iou_thr=0.6,
  185. min_pos_iou=0.6,
  186. ignore_iof_thr=-1),
  187. sampler=dict(
  188. type='RandomSampler',
  189. num=512,
  190. pos_fraction=0.25,
  191. neg_pos_ub=-1,
  192. add_gt_as_proposals=True),
  193. mask_size=28,
  194. pos_weight=-1,
  195. debug=False),
  196. dict(
  197. assigner=dict(
  198. type='MaxIoUAssigner',
  199. pos_iou_thr=0.7,
  200. neg_iou_thr=0.7,
  201. min_pos_iou=0.7,
  202. ignore_iof_thr=-1),
  203. sampler=dict(
  204. type='RandomSampler',
  205. num=512,
  206. pos_fraction=0.25,
  207. neg_pos_ub=-1,
  208. add_gt_as_proposals=True),
  209. mask_size=28,
  210. pos_weight=-1,
  211. debug=False)
  212. ]),
  213. test_cfg=dict(
  214. rpn=dict(
  215. nms_pre=1000,
  216. max_per_img=1000,
  217. nms=dict(type='nms', iou_threshold=0.7),
  218. min_bbox_size=0),
  219. rcnn=dict(
  220. score_thr=0.001,
  221. nms=dict(type='nms', iou_threshold=0.5),
  222. max_per_img=100,
  223. mask_thr_binary=0.5)))