mask-rcnn_r50_fpn.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. # model settings
  2. model = dict(
  3. type='MaskRCNN',
  4. data_preprocessor=dict(
  5. type='DetDataPreprocessor',
  6. mean=[123.675, 116.28, 103.53],
  7. std=[58.395, 57.12, 57.375],
  8. bgr_to_rgb=True,
  9. pad_mask=True,
  10. pad_size_divisor=32),
  11. backbone=dict(
  12. type='ResNet',
  13. depth=50,
  14. num_stages=4,
  15. out_indices=(0, 1, 2, 3),
  16. frozen_stages=1,
  17. norm_cfg=dict(type='BN', requires_grad=True),
  18. norm_eval=True,
  19. style='pytorch',
  20. init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
  21. neck=dict(
  22. type='FPN',
  23. in_channels=[256, 512, 1024, 2048],
  24. out_channels=256,
  25. num_outs=5),
  26. rpn_head=dict(
  27. type='RPNHead',
  28. in_channels=256,
  29. feat_channels=256,
  30. anchor_generator=dict(
  31. type='AnchorGenerator',
  32. scales=[8],
  33. ratios=[0.5, 1.0, 2.0],
  34. strides=[4, 8, 16, 32, 64]),
  35. bbox_coder=dict(
  36. type='DeltaXYWHBBoxCoder',
  37. target_means=[.0, .0, .0, .0],
  38. target_stds=[1.0, 1.0, 1.0, 1.0]),
  39. loss_cls=dict(
  40. type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
  41. loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
  42. roi_head=dict(
  43. type='StandardRoIHead',
  44. bbox_roi_extractor=dict(
  45. type='SingleRoIExtractor',
  46. roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
  47. out_channels=256,
  48. featmap_strides=[4, 8, 16, 32]),
  49. bbox_head=dict(
  50. type='Shared2FCBBoxHead',
  51. in_channels=256,
  52. fc_out_channels=1024,
  53. roi_feat_size=7,
  54. num_classes=80,
  55. bbox_coder=dict(
  56. type='DeltaXYWHBBoxCoder',
  57. target_means=[0., 0., 0., 0.],
  58. target_stds=[0.1, 0.1, 0.2, 0.2]),
  59. reg_class_agnostic=False,
  60. loss_cls=dict(
  61. type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
  62. loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
  63. mask_roi_extractor=dict(
  64. type='SingleRoIExtractor',
  65. roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
  66. out_channels=256,
  67. featmap_strides=[4, 8, 16, 32]),
  68. mask_head=dict(
  69. type='FCNMaskHead',
  70. num_convs=4,
  71. in_channels=256,
  72. conv_out_channels=256,
  73. num_classes=80,
  74. loss_mask=dict(
  75. type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
  76. # model training and testing settings
  77. train_cfg=dict(
  78. rpn=dict(
  79. assigner=dict(
  80. type='MaxIoUAssigner',
  81. pos_iou_thr=0.7,
  82. neg_iou_thr=0.3,
  83. min_pos_iou=0.3,
  84. match_low_quality=True,
  85. ignore_iof_thr=-1),
  86. sampler=dict(
  87. type='RandomSampler',
  88. num=256,
  89. pos_fraction=0.5,
  90. neg_pos_ub=-1,
  91. add_gt_as_proposals=False),
  92. allowed_border=-1,
  93. pos_weight=-1,
  94. debug=False),
  95. rpn_proposal=dict(
  96. nms_pre=2000,
  97. max_per_img=1000,
  98. nms=dict(type='nms', iou_threshold=0.7),
  99. min_bbox_size=0),
  100. rcnn=dict(
  101. assigner=dict(
  102. type='MaxIoUAssigner',
  103. pos_iou_thr=0.5,
  104. neg_iou_thr=0.5,
  105. min_pos_iou=0.5,
  106. match_low_quality=True,
  107. ignore_iof_thr=-1),
  108. sampler=dict(
  109. type='RandomSampler',
  110. num=512,
  111. pos_fraction=0.25,
  112. neg_pos_ub=-1,
  113. add_gt_as_proposals=True),
  114. mask_size=28,
  115. pos_weight=-1,
  116. debug=False)),
  117. test_cfg=dict(
  118. rpn=dict(
  119. nms_pre=1000,
  120. max_per_img=1000,
  121. nms=dict(type='nms', iou_threshold=0.7),
  122. min_bbox_size=0),
  123. rcnn=dict(
  124. score_thr=0.05,
  125. nms=dict(type='nms', iou_threshold=0.5),
  126. max_per_img=100,
  127. mask_thr_binary=0.5)))