retinanet_r50_fpn.py 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. # model settings
  2. model = dict(
  3. type='RetinaNet',
  4. data_preprocessor=dict(
  5. type='DetDataPreprocessor',
  6. mean=[123.675, 116.28, 103.53],
  7. std=[58.395, 57.12, 57.375],
  8. bgr_to_rgb=True,
  9. pad_size_divisor=32),
  10. backbone=dict(
  11. type='ResNet',
  12. depth=50,
  13. num_stages=4,
  14. out_indices=(0, 1, 2, 3),
  15. frozen_stages=1,
  16. norm_cfg=dict(type='BN', requires_grad=True),
  17. norm_eval=True,
  18. style='pytorch',
  19. init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
  20. neck=dict(
  21. type='FPN',
  22. in_channels=[256, 512, 1024, 2048],
  23. out_channels=256,
  24. start_level=1,
  25. add_extra_convs='on_input',
  26. num_outs=5),
  27. bbox_head=dict(
  28. type='RetinaHead',
  29. num_classes=80,
  30. in_channels=256,
  31. stacked_convs=4,
  32. feat_channels=256,
  33. anchor_generator=dict(
  34. type='AnchorGenerator',
  35. octave_base_scale=4,
  36. scales_per_octave=3,
  37. ratios=[0.5, 1.0, 2.0],
  38. strides=[8, 16, 32, 64, 128]),
  39. bbox_coder=dict(
  40. type='DeltaXYWHBBoxCoder',
  41. target_means=[.0, .0, .0, .0],
  42. target_stds=[1.0, 1.0, 1.0, 1.0]),
  43. loss_cls=dict(
  44. type='FocalLoss',
  45. use_sigmoid=True,
  46. gamma=2.0,
  47. alpha=0.25,
  48. loss_weight=1.0),
  49. loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
  50. # model training and testing settings
  51. train_cfg=dict(
  52. assigner=dict(
  53. type='MaxIoUAssigner',
  54. pos_iou_thr=0.5,
  55. neg_iou_thr=0.4,
  56. min_pos_iou=0,
  57. ignore_iof_thr=-1),
  58. sampler=dict(
  59. type='PseudoSampler'), # Focal loss should use PseudoSampler
  60. allowed_border=-1,
  61. pos_weight=-1,
  62. debug=False),
  63. test_cfg=dict(
  64. nms_pre=1000,
  65. min_bbox_size=0,
  66. score_thr=0.05,
  67. nms=dict(type='nms', iou_threshold=0.5),
  68. max_per_img=100))