faster_rcnn_r50_fpn_1class.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. checkpoint_config = dict(interval=1)
  2. # yapf:disable
  3. log_config = dict(
  4. interval=50,
  5. hooks=[
  6. dict(type='TextLoggerHook'),
  7. # dict(type='TensorboardLoggerHook')
  8. ])
  9. # yapf:enable
  10. dist_params = dict(backend='nccl')
  11. log_level = 'INFO'
  12. load_from = None
  13. resume_from = None
  14. workflow = [('train', 1)]
  15. # optimizer
  16. optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
  17. optimizer_config = dict(grad_clip=None)
  18. # learning policy
  19. lr_config = dict(
  20. policy='step',
  21. warmup='linear',
  22. warmup_iters=500,
  23. warmup_ratio=0.001,
  24. step=[8, 11])
  25. total_epochs = 12
  26. model = dict(
  27. type='FasterRCNN',
  28. pretrained='torchvision://resnet50',
  29. backbone=dict(
  30. type='ResNet',
  31. depth=50,
  32. num_stages=4,
  33. out_indices=(0, 1, 2, 3),
  34. frozen_stages=1,
  35. norm_cfg=dict(type='BN', requires_grad=True),
  36. norm_eval=True,
  37. style='pytorch'),
  38. neck=dict(
  39. type='FPN',
  40. in_channels=[256, 512, 1024, 2048],
  41. out_channels=256,
  42. num_outs=5),
  43. rpn_head=dict(
  44. type='RPNHead',
  45. in_channels=256,
  46. feat_channels=256,
  47. anchor_generator=dict(
  48. type='AnchorGenerator',
  49. scales=[8],
  50. ratios=[0.5, 1.0, 2.0],
  51. strides=[4, 8, 16, 32, 64]),
  52. bbox_coder=dict(
  53. type='DeltaXYWHBBoxCoder',
  54. target_means=[.0, .0, .0, .0],
  55. target_stds=[1.0, 1.0, 1.0, 1.0]),
  56. loss_cls=dict(
  57. type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
  58. loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
  59. roi_head=dict(
  60. type='StandardRoIHead',
  61. bbox_roi_extractor=dict(
  62. type='SingleRoIExtractor',
  63. roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
  64. out_channels=256,
  65. featmap_strides=[4, 8, 16, 32]),
  66. bbox_head=dict(
  67. type='Shared2FCBBoxHead',
  68. in_channels=256,
  69. fc_out_channels=1024,
  70. roi_feat_size=7,
  71. num_classes=1,
  72. bbox_coder=dict(
  73. type='DeltaXYWHBBoxCoder',
  74. target_means=[0., 0., 0., 0.],
  75. target_stds=[0.1, 0.1, 0.2, 0.2]),
  76. reg_class_agnostic=False,
  77. loss_cls=dict(
  78. type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
  79. loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
  80. # model training and testing settings
  81. train_cfg=dict(
  82. rpn=dict(
  83. assigner=dict(
  84. type='MaxIoUAssigner',
  85. pos_iou_thr=0.7,
  86. neg_iou_thr=0.3,
  87. min_pos_iou=0.3,
  88. match_low_quality=True,
  89. ignore_iof_thr=-1),
  90. sampler=dict(
  91. type='RandomSampler',
  92. num=256,
  93. pos_fraction=0.5,
  94. neg_pos_ub=-1,
  95. add_gt_as_proposals=False),
  96. allowed_border=-1,
  97. pos_weight=-1,
  98. debug=False),
  99. rpn_proposal=dict(
  100. nms_pre=2000,
  101. max_per_img=1000,
  102. nms=dict(type='nms', iou_threshold=0.7),
  103. min_bbox_size=0),
  104. rcnn=dict(
  105. assigner=dict(
  106. type='MaxIoUAssigner',
  107. pos_iou_thr=0.5,
  108. neg_iou_thr=0.5,
  109. min_pos_iou=0.5,
  110. match_low_quality=False,
  111. ignore_iof_thr=-1),
  112. sampler=dict(
  113. type='RandomSampler',
  114. num=512,
  115. pos_fraction=0.25,
  116. neg_pos_ub=-1,
  117. add_gt_as_proposals=True),
  118. pos_weight=-1,
  119. debug=False)),
  120. test_cfg=dict(
  121. rpn=dict(
  122. nms_pre=1000,
  123. max_per_img=1000,
  124. nms=dict(type='nms', iou_threshold=0.7),
  125. min_bbox_size=0),
  126. rcnn=dict(
  127. score_thr=0.05,
  128. nms=dict(type='nms', iou_threshold=0.5),
  129. max_per_img=100)
  130. # soft-nms is also supported for rcnn testing
  131. # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
  132. ))
  133. dataset_type = 'CocoDataset'
  134. data_root = 'data/coco'
  135. img_norm_cfg = dict(
  136. mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
  137. train_pipeline = [
  138. dict(type='LoadImageFromFile'),
  139. dict(type='LoadAnnotations', with_bbox=True),
  140. dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
  141. dict(type='RandomFlip', flip_ratio=0.5),
  142. dict(type='Normalize', **img_norm_cfg),
  143. dict(type='Pad', size_divisor=32),
  144. dict(type='DefaultFormatBundle'),
  145. dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
  146. ]
  147. test_pipeline = [
  148. dict(type='LoadImageFromFile'),
  149. dict(
  150. type='MultiScaleFlipAug',
  151. img_scale=(1333, 800),
  152. flip=False,
  153. transforms=[
  154. dict(type='Resize', keep_ratio=True),
  155. dict(type='RandomFlip'),
  156. dict(type='Normalize', **img_norm_cfg),
  157. dict(type='Pad', size_divisor=32),
  158. dict(type='DefaultFormatBundle'),
  159. dict(type='Collect', keys=['img']),
  160. ])
  161. ]
  162. data = dict(
  163. samples_per_gpu=2,
  164. workers_per_gpu=2,
  165. train=dict(
  166. type=dataset_type,
  167. ann_file=f'{data_root}/annotations/instances_train2017.json',
  168. img_prefix=f'{data_root}/train2017/',
  169. pipeline=train_pipeline),
  170. val=dict(
  171. type=dataset_type,
  172. ann_file=f'{data_root}/annotations/instances_val2017.json',
  173. img_prefix=f'{data_root}/val2017/',
  174. pipeline=test_pipeline),
  175. test=dict(
  176. type=dataset_type,
  177. ann_file=f'{data_root}/annotations/instances_val2017.json',
  178. img_prefix=f'{data_root}/val2017/',
  179. pipeline=test_pipeline))
  180. evaluation = dict(interval=1, metric='bbox')