grid-rcnn_r50_fpn_gn-head_2x_coco.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. _base_ = [
  2. '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py'
  3. ]
  4. # model settings
  5. model = dict(
  6. type='GridRCNN',
  7. data_preprocessor=dict(
  8. type='DetDataPreprocessor',
  9. mean=[123.675, 116.28, 103.53],
  10. std=[58.395, 57.12, 57.375],
  11. bgr_to_rgb=True,
  12. pad_size_divisor=32),
  13. backbone=dict(
  14. type='ResNet',
  15. depth=50,
  16. num_stages=4,
  17. out_indices=(0, 1, 2, 3),
  18. frozen_stages=1,
  19. norm_cfg=dict(type='BN', requires_grad=True),
  20. norm_eval=True,
  21. style='pytorch',
  22. init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
  23. neck=dict(
  24. type='FPN',
  25. in_channels=[256, 512, 1024, 2048],
  26. out_channels=256,
  27. num_outs=5),
  28. rpn_head=dict(
  29. type='RPNHead',
  30. in_channels=256,
  31. feat_channels=256,
  32. anchor_generator=dict(
  33. type='AnchorGenerator',
  34. scales=[8],
  35. ratios=[0.5, 1.0, 2.0],
  36. strides=[4, 8, 16, 32, 64]),
  37. bbox_coder=dict(
  38. type='DeltaXYWHBBoxCoder',
  39. target_means=[.0, .0, .0, .0],
  40. target_stds=[1.0, 1.0, 1.0, 1.0]),
  41. loss_cls=dict(
  42. type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
  43. loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
  44. roi_head=dict(
  45. type='GridRoIHead',
  46. bbox_roi_extractor=dict(
  47. type='SingleRoIExtractor',
  48. roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
  49. out_channels=256,
  50. featmap_strides=[4, 8, 16, 32]),
  51. bbox_head=dict(
  52. type='Shared2FCBBoxHead',
  53. with_reg=False,
  54. in_channels=256,
  55. fc_out_channels=1024,
  56. roi_feat_size=7,
  57. num_classes=80,
  58. bbox_coder=dict(
  59. type='DeltaXYWHBBoxCoder',
  60. target_means=[0., 0., 0., 0.],
  61. target_stds=[0.1, 0.1, 0.2, 0.2]),
  62. reg_class_agnostic=False),
  63. grid_roi_extractor=dict(
  64. type='SingleRoIExtractor',
  65. roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
  66. out_channels=256,
  67. featmap_strides=[4, 8, 16, 32]),
  68. grid_head=dict(
  69. type='GridHead',
  70. grid_points=9,
  71. num_convs=8,
  72. in_channels=256,
  73. point_feat_channels=64,
  74. norm_cfg=dict(type='GN', num_groups=36),
  75. loss_grid=dict(
  76. type='CrossEntropyLoss', use_sigmoid=True, loss_weight=15))),
  77. # model training and testing settings
  78. train_cfg=dict(
  79. rpn=dict(
  80. assigner=dict(
  81. type='MaxIoUAssigner',
  82. pos_iou_thr=0.7,
  83. neg_iou_thr=0.3,
  84. min_pos_iou=0.3,
  85. ignore_iof_thr=-1),
  86. sampler=dict(
  87. type='RandomSampler',
  88. num=256,
  89. pos_fraction=0.5,
  90. neg_pos_ub=-1,
  91. add_gt_as_proposals=False),
  92. allowed_border=0,
  93. pos_weight=-1,
  94. debug=False),
  95. rpn_proposal=dict(
  96. nms_pre=2000,
  97. max_per_img=2000,
  98. nms=dict(type='nms', iou_threshold=0.7),
  99. min_bbox_size=0),
  100. rcnn=dict(
  101. assigner=dict(
  102. type='MaxIoUAssigner',
  103. pos_iou_thr=0.5,
  104. neg_iou_thr=0.5,
  105. min_pos_iou=0.5,
  106. ignore_iof_thr=-1),
  107. sampler=dict(
  108. type='RandomSampler',
  109. num=512,
  110. pos_fraction=0.25,
  111. neg_pos_ub=-1,
  112. add_gt_as_proposals=True),
  113. pos_radius=1,
  114. pos_weight=-1,
  115. max_num_grid=192,
  116. debug=False)),
  117. test_cfg=dict(
  118. rpn=dict(
  119. nms_pre=1000,
  120. max_per_img=1000,
  121. nms=dict(type='nms', iou_threshold=0.7),
  122. min_bbox_size=0),
  123. rcnn=dict(
  124. score_thr=0.03,
  125. nms=dict(type='nms', iou_threshold=0.3),
  126. max_per_img=100)))
  127. # optimizer
  128. optim_wrapper = dict(
  129. type='OptimWrapper',
  130. optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
  131. # training schedule
  132. max_epochs = 25
  133. train_cfg = dict(
  134. type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
  135. val_cfg = dict(type='ValLoop')
  136. test_cfg = dict(type='TestLoop')
  137. # learning rate
  138. param_scheduler = [
  139. dict(
  140. type='LinearLR',
  141. start_factor=1.0 / 80,
  142. by_epoch=False,
  143. begin=0,
  144. end=3665),
  145. dict(
  146. type='MultiStepLR',
  147. begin=0,
  148. end=max_epochs,
  149. by_epoch=True,
  150. milestones=[17, 23],
  151. gamma=0.1)
  152. ]
  153. # Default setting for scaling LR automatically
  154. # - `enable` means enable scaling LR automatically
  155. # or not by default.
  156. # - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
  157. auto_scale_lr = dict(enable=False, base_batch_size=16)