vfnet_r50_fpn_1x_coco.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. _base_ = [
  2. '../_base_/datasets/coco_detection.py',
  3. '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
  4. ]
  5. # model settings
  6. model = dict(
  7. type='VFNet',
  8. data_preprocessor=dict(
  9. type='DetDataPreprocessor',
  10. mean=[123.675, 116.28, 103.53],
  11. std=[58.395, 57.12, 57.375],
  12. bgr_to_rgb=True,
  13. pad_size_divisor=32),
  14. backbone=dict(
  15. type='ResNet',
  16. depth=50,
  17. num_stages=4,
  18. out_indices=(0, 1, 2, 3),
  19. frozen_stages=1,
  20. norm_cfg=dict(type='BN', requires_grad=True),
  21. norm_eval=True,
  22. style='pytorch',
  23. init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
  24. neck=dict(
  25. type='FPN',
  26. in_channels=[256, 512, 1024, 2048],
  27. out_channels=256,
  28. start_level=1,
  29. add_extra_convs='on_output', # use P5
  30. num_outs=5,
  31. relu_before_extra_convs=True),
  32. bbox_head=dict(
  33. type='VFNetHead',
  34. num_classes=80,
  35. in_channels=256,
  36. stacked_convs=3,
  37. feat_channels=256,
  38. strides=[8, 16, 32, 64, 128],
  39. center_sampling=False,
  40. dcn_on_last_conv=False,
  41. use_atss=True,
  42. use_vfl=True,
  43. loss_cls=dict(
  44. type='VarifocalLoss',
  45. use_sigmoid=True,
  46. alpha=0.75,
  47. gamma=2.0,
  48. iou_weighted=True,
  49. loss_weight=1.0),
  50. loss_bbox=dict(type='GIoULoss', loss_weight=1.5),
  51. loss_bbox_refine=dict(type='GIoULoss', loss_weight=2.0)),
  52. # training and testing settings
  53. train_cfg=dict(
  54. assigner=dict(type='ATSSAssigner', topk=9),
  55. allowed_border=-1,
  56. pos_weight=-1,
  57. debug=False),
  58. test_cfg=dict(
  59. nms_pre=1000,
  60. min_bbox_size=0,
  61. score_thr=0.05,
  62. nms=dict(type='nms', iou_threshold=0.6),
  63. max_per_img=100))
  64. # data setting
  65. train_pipeline = [
  66. dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
  67. dict(type='LoadAnnotations', with_bbox=True),
  68. dict(type='Resize', scale=(1333, 800), keep_ratio=True),
  69. dict(type='RandomFlip', prob=0.5),
  70. dict(type='PackDetInputs')
  71. ]
  72. test_pipeline = [
  73. dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
  74. dict(type='Resize', scale=(1333, 800), keep_ratio=True),
  75. dict(type='LoadAnnotations', with_bbox=True),
  76. dict(
  77. type='PackDetInputs',
  78. meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
  79. 'scale_factor'))
  80. ]
  81. train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
  82. val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
  83. test_dataloader = val_dataloader
  84. # optimizer
  85. optim_wrapper = dict(
  86. optimizer=dict(lr=0.01),
  87. paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.),
  88. clip_grad=None)
  89. # learning rate
  90. max_epochs = 12
  91. param_scheduler = [
  92. dict(type='LinearLR', start_factor=0.1, by_epoch=False, begin=0, end=500),
  93. dict(
  94. type='MultiStepLR',
  95. begin=0,
  96. end=max_epochs,
  97. by_epoch=True,
  98. milestones=[8, 11],
  99. gamma=0.1)
  100. ]
  101. train_cfg = dict(max_epochs=max_epochs)