scnet_r50_fpn_1x_coco.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. _base_ = '../htc/htc_r50_fpn_1x_coco.py'
  2. # model settings
  3. model = dict(
  4. type='SCNet',
  5. roi_head=dict(
  6. _delete_=True,
  7. type='SCNetRoIHead',
  8. num_stages=3,
  9. stage_loss_weights=[1, 0.5, 0.25],
  10. bbox_roi_extractor=dict(
  11. type='SingleRoIExtractor',
  12. roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
  13. out_channels=256,
  14. featmap_strides=[4, 8, 16, 32]),
  15. bbox_head=[
  16. dict(
  17. type='SCNetBBoxHead',
  18. num_shared_fcs=2,
  19. in_channels=256,
  20. fc_out_channels=1024,
  21. roi_feat_size=7,
  22. num_classes=80,
  23. bbox_coder=dict(
  24. type='DeltaXYWHBBoxCoder',
  25. target_means=[0., 0., 0., 0.],
  26. target_stds=[0.1, 0.1, 0.2, 0.2]),
  27. reg_class_agnostic=True,
  28. loss_cls=dict(
  29. type='CrossEntropyLoss',
  30. use_sigmoid=False,
  31. loss_weight=1.0),
  32. loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
  33. loss_weight=1.0)),
  34. dict(
  35. type='SCNetBBoxHead',
  36. num_shared_fcs=2,
  37. in_channels=256,
  38. fc_out_channels=1024,
  39. roi_feat_size=7,
  40. num_classes=80,
  41. bbox_coder=dict(
  42. type='DeltaXYWHBBoxCoder',
  43. target_means=[0., 0., 0., 0.],
  44. target_stds=[0.05, 0.05, 0.1, 0.1]),
  45. reg_class_agnostic=True,
  46. loss_cls=dict(
  47. type='CrossEntropyLoss',
  48. use_sigmoid=False,
  49. loss_weight=1.0),
  50. loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
  51. loss_weight=1.0)),
  52. dict(
  53. type='SCNetBBoxHead',
  54. num_shared_fcs=2,
  55. in_channels=256,
  56. fc_out_channels=1024,
  57. roi_feat_size=7,
  58. num_classes=80,
  59. bbox_coder=dict(
  60. type='DeltaXYWHBBoxCoder',
  61. target_means=[0., 0., 0., 0.],
  62. target_stds=[0.033, 0.033, 0.067, 0.067]),
  63. reg_class_agnostic=True,
  64. loss_cls=dict(
  65. type='CrossEntropyLoss',
  66. use_sigmoid=False,
  67. loss_weight=1.0),
  68. loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
  69. ],
  70. mask_roi_extractor=dict(
  71. type='SingleRoIExtractor',
  72. roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
  73. out_channels=256,
  74. featmap_strides=[4, 8, 16, 32]),
  75. mask_head=dict(
  76. type='SCNetMaskHead',
  77. num_convs=12,
  78. in_channels=256,
  79. conv_out_channels=256,
  80. num_classes=80,
  81. conv_to_res=True,
  82. loss_mask=dict(
  83. type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)),
  84. semantic_roi_extractor=dict(
  85. type='SingleRoIExtractor',
  86. roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
  87. out_channels=256,
  88. featmap_strides=[8]),
  89. semantic_head=dict(
  90. type='SCNetSemanticHead',
  91. num_ins=5,
  92. fusion_level=1,
  93. seg_scale_factor=1 / 8,
  94. num_convs=4,
  95. in_channels=256,
  96. conv_out_channels=256,
  97. num_classes=183,
  98. loss_seg=dict(
  99. type='CrossEntropyLoss', ignore_index=255, loss_weight=0.2),
  100. conv_to_res=True),
  101. glbctx_head=dict(
  102. type='GlobalContextHead',
  103. num_convs=4,
  104. in_channels=256,
  105. conv_out_channels=256,
  106. num_classes=80,
  107. loss_weight=3.0,
  108. conv_to_res=True),
  109. feat_relay_head=dict(
  110. type='FeatureRelayHead',
  111. in_channels=1024,
  112. out_conv_channels=256,
  113. roi_feat_size=7,
  114. scale_factor=2)))
  115. # TODO
  116. # uncomment below code to enable test time augmentations
  117. # img_norm_cfg = dict(
  118. # mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
  119. # test_pipeline = [
  120. # dict(type='LoadImageFromFile'),
  121. # dict(
  122. # type='MultiScaleFlipAug',
  123. # img_scale=[(600, 900), (800, 1200), (1000, 1500), (1200, 1800),
  124. # (1400, 2100)],
  125. # flip=True,
  126. # transforms=[
  127. # dict(type='Resize', keep_ratio=True),
  128. # dict(type='RandomFlip', flip_ratio=0.5),
  129. # dict(type='Normalize', **img_norm_cfg),
  130. # dict(type='Pad', size_divisor=32),
  131. # dict(type='ImageToTensor', keys=['img']),
  132. # dict(type='Collect', keys=['img']),
  133. # ])
  134. # ]
  135. # data = dict(
  136. # val=dict(pipeline=test_pipeline),
  137. # test=dict(pipeline=test_pipeline))