cascade_rcnn_x101_64x4d_fpn_1class.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. # runtime settings
  2. default_scope = 'mmdet'
  3. default_hooks = dict(
  4. timer=dict(type='IterTimerHook'),
  5. logger=dict(type='LoggerHook', interval=50),
  6. param_scheduler=dict(type='ParamSchedulerHook'),
  7. checkpoint=dict(type='CheckpointHook', interval=1),
  8. sampler_seed=dict(type='DistSamplerSeedHook'),
  9. visualization=dict(type='DetVisualizationHook'))
  10. env_cfg = dict(
  11. cudnn_benchmark=False,
  12. mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
  13. dist_cfg=dict(backend='nccl'),
  14. )
  15. vis_backends = [dict(type='LocalVisBackend')]
  16. visualizer = dict(
  17. type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
  18. log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
  19. log_level = 'INFO'
  20. load_from = None
  21. resume = False
  22. # model settings
  23. model = dict(
  24. type='CascadeRCNN',
  25. data_preprocessor=dict(
  26. type='DetDataPreprocessor',
  27. mean=[123.675, 116.28, 103.53],
  28. std=[58.395, 57.12, 57.375],
  29. bgr_to_rgb=True,
  30. pad_mask=True,
  31. pad_size_divisor=32),
  32. backbone=dict(
  33. type='ResNeXt',
  34. depth=101,
  35. groups=64,
  36. base_width=4,
  37. num_stages=4,
  38. out_indices=(0, 1, 2, 3),
  39. frozen_stages=1,
  40. norm_cfg=dict(type='BN', requires_grad=True),
  41. style='pytorch',
  42. init_cfg=dict(
  43. type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')),
  44. neck=dict(
  45. type='FPN',
  46. in_channels=[256, 512, 1024, 2048],
  47. out_channels=256,
  48. num_outs=5),
  49. rpn_head=dict(
  50. type='RPNHead',
  51. in_channels=256,
  52. feat_channels=256,
  53. anchor_generator=dict(
  54. type='AnchorGenerator',
  55. scales=[8],
  56. ratios=[0.5, 1.0, 2.0],
  57. strides=[4, 8, 16, 32, 64]),
  58. bbox_coder=dict(
  59. type='DeltaXYWHBBoxCoder',
  60. target_means=[.0, .0, .0, .0],
  61. target_stds=[1.0, 1.0, 1.0, 1.0]),
  62. loss_cls=dict(
  63. type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
  64. loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
  65. roi_head=dict(
  66. type='CascadeRoIHead',
  67. num_stages=3,
  68. stage_loss_weights=[1, 0.5, 0.25],
  69. bbox_roi_extractor=dict(
  70. type='SingleRoIExtractor',
  71. roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
  72. out_channels=256,
  73. featmap_strides=[4, 8, 16, 32]),
  74. bbox_head=[
  75. dict(
  76. type='Shared2FCBBoxHead',
  77. in_channels=256,
  78. fc_out_channels=1024,
  79. roi_feat_size=7,
  80. num_classes=1,
  81. bbox_coder=dict(
  82. type='DeltaXYWHBBoxCoder',
  83. target_means=[0., 0., 0., 0.],
  84. target_stds=[0.1, 0.1, 0.2, 0.2]),
  85. reg_class_agnostic=True,
  86. loss_cls=dict(
  87. type='CrossEntropyLoss',
  88. use_sigmoid=False,
  89. loss_weight=1.0),
  90. loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
  91. loss_weight=1.0)),
  92. dict(
  93. type='Shared2FCBBoxHead',
  94. in_channels=256,
  95. fc_out_channels=1024,
  96. roi_feat_size=7,
  97. num_classes=1,
  98. bbox_coder=dict(
  99. type='DeltaXYWHBBoxCoder',
  100. target_means=[0., 0., 0., 0.],
  101. target_stds=[0.05, 0.05, 0.1, 0.1]),
  102. reg_class_agnostic=True,
  103. loss_cls=dict(
  104. type='CrossEntropyLoss',
  105. use_sigmoid=False,
  106. loss_weight=1.0),
  107. loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
  108. loss_weight=1.0)),
  109. dict(
  110. type='Shared2FCBBoxHead',
  111. in_channels=256,
  112. fc_out_channels=1024,
  113. roi_feat_size=7,
  114. num_classes=1,
  115. bbox_coder=dict(
  116. type='DeltaXYWHBBoxCoder',
  117. target_means=[0., 0., 0., 0.],
  118. target_stds=[0.033, 0.033, 0.067, 0.067]),
  119. reg_class_agnostic=True,
  120. loss_cls=dict(
  121. type='CrossEntropyLoss',
  122. use_sigmoid=False,
  123. loss_weight=1.0),
  124. loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
  125. ]),
  126. # model training and testing settings
  127. train_cfg=dict(
  128. rpn=dict(
  129. assigner=dict(
  130. type='MaxIoUAssigner',
  131. pos_iou_thr=0.7,
  132. neg_iou_thr=0.3,
  133. min_pos_iou=0.3,
  134. match_low_quality=True,
  135. ignore_iof_thr=-1),
  136. sampler=dict(
  137. type='RandomSampler',
  138. num=256,
  139. pos_fraction=0.5,
  140. neg_pos_ub=-1,
  141. add_gt_as_proposals=False),
  142. allowed_border=0,
  143. pos_weight=-1,
  144. debug=False),
  145. rpn_proposal=dict(
  146. nms_pre=2000,
  147. max_per_img=2000,
  148. nms=dict(type='nms', iou_threshold=0.7),
  149. min_bbox_size=0),
  150. rcnn=[
  151. dict(
  152. assigner=dict(
  153. type='MaxIoUAssigner',
  154. pos_iou_thr=0.5,
  155. neg_iou_thr=0.5,
  156. min_pos_iou=0.5,
  157. match_low_quality=False,
  158. ignore_iof_thr=-1),
  159. sampler=dict(
  160. type='RandomSampler',
  161. num=512,
  162. pos_fraction=0.25,
  163. neg_pos_ub=-1,
  164. add_gt_as_proposals=True),
  165. pos_weight=-1,
  166. debug=False),
  167. dict(
  168. assigner=dict(
  169. type='MaxIoUAssigner',
  170. pos_iou_thr=0.6,
  171. neg_iou_thr=0.6,
  172. min_pos_iou=0.6,
  173. match_low_quality=False,
  174. ignore_iof_thr=-1),
  175. sampler=dict(
  176. type='RandomSampler',
  177. num=512,
  178. pos_fraction=0.25,
  179. neg_pos_ub=-1,
  180. add_gt_as_proposals=True),
  181. pos_weight=-1,
  182. debug=False),
  183. dict(
  184. assigner=dict(
  185. type='MaxIoUAssigner',
  186. pos_iou_thr=0.7,
  187. neg_iou_thr=0.7,
  188. min_pos_iou=0.7,
  189. match_low_quality=False,
  190. ignore_iof_thr=-1),
  191. sampler=dict(
  192. type='RandomSampler',
  193. num=512,
  194. pos_fraction=0.25,
  195. neg_pos_ub=-1,
  196. add_gt_as_proposals=True),
  197. pos_weight=-1,
  198. debug=False)
  199. ]),
  200. test_cfg=dict(
  201. rpn=dict(
  202. nms_pre=1000,
  203. max_per_img=1000,
  204. nms=dict(type='nms', iou_threshold=0.7),
  205. min_bbox_size=0),
  206. rcnn=dict(
  207. score_thr=0.05,
  208. nms=dict(type='nms', iou_threshold=0.5),
  209. max_per_img=100)))
  210. # dataset settings
  211. dataset_type = 'CocoDataset'
  212. data_root = 'data/coco/'
  213. train_pipeline = [
  214. dict(type='LoadImageFromFile'),
  215. dict(type='LoadAnnotations', with_bbox=True),
  216. dict(type='Resize', scale=(1333, 800), keep_ratio=True),
  217. dict(type='RandomFlip', prob=0.5),
  218. dict(type='PackDetInputs')
  219. ]
  220. test_pipeline = [
  221. dict(type='LoadImageFromFile'),
  222. dict(type='Resize', scale=(1333, 800), keep_ratio=True),
  223. # If you don't have a gt annotation, delete the pipeline
  224. dict(type='LoadAnnotations', with_bbox=True),
  225. dict(
  226. type='PackDetInputs',
  227. meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
  228. 'scale_factor'))
  229. ]
  230. train_dataloader = dict(
  231. batch_size=2,
  232. num_workers=2,
  233. persistent_workers=True,
  234. sampler=dict(type='DefaultSampler', shuffle=True),
  235. batch_sampler=dict(type='AspectRatioBatchSampler'),
  236. dataset=dict(
  237. type=dataset_type,
  238. data_root=data_root,
  239. ann_file='annotations/instances_train2017.json',
  240. data_prefix=dict(img='train2017/'),
  241. filter_cfg=dict(filter_empty_gt=True, min_size=32),
  242. pipeline=train_pipeline))
  243. val_dataloader = dict(
  244. batch_size=1,
  245. num_workers=2,
  246. persistent_workers=True,
  247. drop_last=False,
  248. sampler=dict(type='DefaultSampler', shuffle=False),
  249. dataset=dict(
  250. type=dataset_type,
  251. data_root=data_root,
  252. ann_file='annotations/instances_val2017.json',
  253. data_prefix=dict(img='val2017/'),
  254. test_mode=True,
  255. pipeline=test_pipeline))
  256. test_dataloader = val_dataloader
  257. val_evaluator = dict(
  258. type='CocoMetric',
  259. ann_file=data_root + 'annotations/instances_val2017.json',
  260. metric='bbox',
  261. format_only=False)
  262. test_evaluator = val_evaluator