rtmdet-ins_l_8xb32-300e_coco.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. _base_ = './rtmdet_l_8xb32-300e_coco.py'
  2. model = dict(
  3. bbox_head=dict(
  4. _delete_=True,
  5. type='RTMDetInsSepBNHead',
  6. num_classes=80,
  7. in_channels=256,
  8. stacked_convs=2,
  9. share_conv=True,
  10. pred_kernel_size=1,
  11. feat_channels=256,
  12. act_cfg=dict(type='SiLU', inplace=True),
  13. norm_cfg=dict(type='SyncBN', requires_grad=True),
  14. anchor_generator=dict(
  15. type='MlvlPointGenerator', offset=0, strides=[8, 16, 32]),
  16. bbox_coder=dict(type='DistancePointBBoxCoder'),
  17. loss_cls=dict(
  18. type='QualityFocalLoss',
  19. use_sigmoid=True,
  20. beta=2.0,
  21. loss_weight=1.0),
  22. loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
  23. loss_mask=dict(
  24. type='DiceLoss', loss_weight=2.0, eps=5e-6, reduction='mean')),
  25. test_cfg=dict(
  26. nms_pre=1000,
  27. min_bbox_size=0,
  28. score_thr=0.05,
  29. nms=dict(type='nms', iou_threshold=0.6),
  30. max_per_img=100,
  31. mask_thr_binary=0.5),
  32. )
  33. train_pipeline = [
  34. dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
  35. dict(
  36. type='LoadAnnotations',
  37. with_bbox=True,
  38. with_mask=True,
  39. poly2mask=False),
  40. dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0),
  41. dict(
  42. type='RandomResize',
  43. scale=(1280, 1280),
  44. ratio_range=(0.1, 2.0),
  45. keep_ratio=True),
  46. dict(
  47. type='RandomCrop',
  48. crop_size=(640, 640),
  49. recompute_bbox=True,
  50. allow_negative_crop=True),
  51. dict(type='YOLOXHSVRandomAug'),
  52. dict(type='RandomFlip', prob=0.5),
  53. dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
  54. dict(
  55. type='CachedMixUp',
  56. img_scale=(640, 640),
  57. ratio_range=(1.0, 1.0),
  58. max_cached_images=20,
  59. pad_val=(114, 114, 114)),
  60. dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)),
  61. dict(type='PackDetInputs')
  62. ]
  63. train_dataloader = dict(pin_memory=True, dataset=dict(pipeline=train_pipeline))
  64. train_pipeline_stage2 = [
  65. dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
  66. dict(
  67. type='LoadAnnotations',
  68. with_bbox=True,
  69. with_mask=True,
  70. poly2mask=False),
  71. dict(
  72. type='RandomResize',
  73. scale=(640, 640),
  74. ratio_range=(0.1, 2.0),
  75. keep_ratio=True),
  76. dict(
  77. type='RandomCrop',
  78. crop_size=(640, 640),
  79. recompute_bbox=True,
  80. allow_negative_crop=True),
  81. dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1)),
  82. dict(type='YOLOXHSVRandomAug'),
  83. dict(type='RandomFlip', prob=0.5),
  84. dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
  85. dict(type='PackDetInputs')
  86. ]
  87. custom_hooks = [
  88. dict(
  89. type='EMAHook',
  90. ema_type='ExpMomentumEMA',
  91. momentum=0.0002,
  92. update_buffers=True,
  93. priority=49),
  94. dict(
  95. type='PipelineSwitchHook',
  96. switch_epoch=280,
  97. switch_pipeline=train_pipeline_stage2)
  98. ]
  99. val_evaluator = dict(metric=['bbox', 'segm'])
  100. test_evaluator = val_evaluator