cascade-rcnn_r50_fpn.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. # model settings
  2. model = dict(
  3. type='CascadeRCNN',
  4. data_preprocessor=dict(
  5. type='DetDataPreprocessor',
  6. mean=[123.675, 116.28, 103.53],
  7. std=[58.395, 57.12, 57.375],
  8. bgr_to_rgb=True,
  9. pad_size_divisor=32),
  10. backbone=dict(
  11. type='ResNet',
  12. depth=50,
  13. num_stages=4,
  14. out_indices=(0, 1, 2, 3),
  15. frozen_stages=1,
  16. norm_cfg=dict(type='BN', requires_grad=True),
  17. norm_eval=True,
  18. style='pytorch',
  19. init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
  20. neck=dict(
  21. type='FPN',
  22. in_channels=[256, 512, 1024, 2048],
  23. out_channels=256,
  24. num_outs=5),
  25. rpn_head=dict(
  26. type='RPNHead',
  27. in_channels=256,
  28. feat_channels=256,
  29. anchor_generator=dict(
  30. type='AnchorGenerator',
  31. scales=[8],
  32. ratios=[0.5, 1.0, 2.0],
  33. strides=[4, 8, 16, 32, 64]),
  34. bbox_coder=dict(
  35. type='DeltaXYWHBBoxCoder',
  36. target_means=[.0, .0, .0, .0],
  37. target_stds=[1.0, 1.0, 1.0, 1.0]),
  38. loss_cls=dict(
  39. type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
  40. loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
  41. roi_head=dict(
  42. type='CascadeRoIHead',
  43. num_stages=3,
  44. stage_loss_weights=[1, 0.5, 0.25],
  45. bbox_roi_extractor=dict(
  46. type='SingleRoIExtractor',
  47. roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
  48. out_channels=256,
  49. featmap_strides=[4, 8, 16, 32]),
  50. bbox_head=[
  51. dict(
  52. type='Shared2FCBBoxHead',
  53. in_channels=256,
  54. fc_out_channels=1024,
  55. roi_feat_size=7,
  56. num_classes=80,
  57. bbox_coder=dict(
  58. type='DeltaXYWHBBoxCoder',
  59. target_means=[0., 0., 0., 0.],
  60. target_stds=[0.1, 0.1, 0.2, 0.2]),
  61. reg_class_agnostic=True,
  62. loss_cls=dict(
  63. type='CrossEntropyLoss',
  64. use_sigmoid=False,
  65. loss_weight=1.0),
  66. loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
  67. loss_weight=1.0)),
  68. dict(
  69. type='Shared2FCBBoxHead',
  70. in_channels=256,
  71. fc_out_channels=1024,
  72. roi_feat_size=7,
  73. num_classes=80,
  74. bbox_coder=dict(
  75. type='DeltaXYWHBBoxCoder',
  76. target_means=[0., 0., 0., 0.],
  77. target_stds=[0.05, 0.05, 0.1, 0.1]),
  78. reg_class_agnostic=True,
  79. loss_cls=dict(
  80. type='CrossEntropyLoss',
  81. use_sigmoid=False,
  82. loss_weight=1.0),
  83. loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
  84. loss_weight=1.0)),
  85. dict(
  86. type='Shared2FCBBoxHead',
  87. in_channels=256,
  88. fc_out_channels=1024,
  89. roi_feat_size=7,
  90. num_classes=80,
  91. bbox_coder=dict(
  92. type='DeltaXYWHBBoxCoder',
  93. target_means=[0., 0., 0., 0.],
  94. target_stds=[0.033, 0.033, 0.067, 0.067]),
  95. reg_class_agnostic=True,
  96. loss_cls=dict(
  97. type='CrossEntropyLoss',
  98. use_sigmoid=False,
  99. loss_weight=1.0),
  100. loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
  101. ]),
  102. # model training and testing settings
  103. train_cfg=dict(
  104. rpn=dict(
  105. assigner=dict(
  106. type='MaxIoUAssigner',
  107. pos_iou_thr=0.7,
  108. neg_iou_thr=0.3,
  109. min_pos_iou=0.3,
  110. match_low_quality=True,
  111. ignore_iof_thr=-1),
  112. sampler=dict(
  113. type='RandomSampler',
  114. num=256,
  115. pos_fraction=0.5,
  116. neg_pos_ub=-1,
  117. add_gt_as_proposals=False),
  118. allowed_border=0,
  119. pos_weight=-1,
  120. debug=False),
  121. rpn_proposal=dict(
  122. nms_pre=2000,
  123. max_per_img=2000,
  124. nms=dict(type='nms', iou_threshold=0.7),
  125. min_bbox_size=0),
  126. rcnn=[
  127. dict(
  128. assigner=dict(
  129. type='MaxIoUAssigner',
  130. pos_iou_thr=0.5,
  131. neg_iou_thr=0.5,
  132. min_pos_iou=0.5,
  133. match_low_quality=False,
  134. ignore_iof_thr=-1),
  135. sampler=dict(
  136. type='RandomSampler',
  137. num=512,
  138. pos_fraction=0.25,
  139. neg_pos_ub=-1,
  140. add_gt_as_proposals=True),
  141. pos_weight=-1,
  142. debug=False),
  143. dict(
  144. assigner=dict(
  145. type='MaxIoUAssigner',
  146. pos_iou_thr=0.6,
  147. neg_iou_thr=0.6,
  148. min_pos_iou=0.6,
  149. match_low_quality=False,
  150. ignore_iof_thr=-1),
  151. sampler=dict(
  152. type='RandomSampler',
  153. num=512,
  154. pos_fraction=0.25,
  155. neg_pos_ub=-1,
  156. add_gt_as_proposals=True),
  157. pos_weight=-1,
  158. debug=False),
  159. dict(
  160. assigner=dict(
  161. type='MaxIoUAssigner',
  162. pos_iou_thr=0.7,
  163. neg_iou_thr=0.7,
  164. min_pos_iou=0.7,
  165. match_low_quality=False,
  166. ignore_iof_thr=-1),
  167. sampler=dict(
  168. type='RandomSampler',
  169. num=512,
  170. pos_fraction=0.25,
  171. neg_pos_ub=-1,
  172. add_gt_as_proposals=True),
  173. pos_weight=-1,
  174. debug=False)
  175. ]),
  176. test_cfg=dict(
  177. rpn=dict(
  178. nms_pre=1000,
  179. max_per_img=1000,
  180. nms=dict(type='nms', iou_threshold=0.7),
  181. min_bbox_size=0),
  182. rcnn=dict(
  183. score_thr=0.05,
  184. nms=dict(type='nms', iou_threshold=0.5),
  185. max_per_img=100)))